aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c6
-rw-r--r--fs/9p/fid.c4
-rw-r--r--fs/9p/v9fs.h7
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_file.c13
-rw-r--r--fs/9p/vfs_inode.c29
-rw-r--r--fs/9p/vfs_inode_dotl.c24
-rw-r--r--fs/9p/vfs_super.c82
-rw-r--r--fs/adfs/adfs.h25
-rw-r--r--fs/adfs/dir_f.c23
-rw-r--r--fs/adfs/dir_fplus.c119
-rw-r--r--fs/adfs/inode.c63
-rw-r--r--fs/adfs/map.c2
-rw-r--r--fs/adfs/super.c23
-rw-r--r--fs/affs/Makefile2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/cache.c12
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/aio.c81
-rw-r--r--fs/attr.c6
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c4
-rw-r--r--fs/autofs4/expire.c84
-rw-r--r--fs/autofs4/root.c64
-rw-r--r--fs/autofs4/waitq.c6
-rw-r--r--fs/befs/ChangeLog10
-rw-r--r--fs/befs/befs_fs_types.h2
-rw-r--r--fs/befs/btree.c2
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/file.c1
-rw-r--r--fs/binfmt_elf.c10
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio-integrity.c3
-rw-r--r--fs/bio.c14
-rw-r--r--fs/block_dev.c35
-rw-r--r--fs/btrfs/acl.c11
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/compression.c17
-rw-r--r--fs/btrfs/ctree.c159
-rw-r--r--fs/btrfs/ctree.h32
-rw-r--r--fs/btrfs/delayed-ref.c6
-rw-r--r--fs/btrfs/dir-item.c45
-rw-r--r--fs/btrfs/disk-io.c217
-rw-r--r--fs/btrfs/extent-tree.c354
-rw-r--r--fs/btrfs/extent_io.c87
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c391
-rw-r--r--fs/btrfs/free-space-cache.c713
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c557
-rw-r--r--fs/btrfs/ioctl.c112
-rw-r--r--fs/btrfs/ordered-data.c8
-rw-r--r--fs/btrfs/relocation.c10
-rw-r--r--fs/btrfs/root-tree.c24
-rw-r--r--fs/btrfs/super.c66
-rw-r--r--fs/btrfs/transaction.c64
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/tree-log.c57
-rw-r--r--fs/btrfs/volumes.c235
-rw-r--r--fs/btrfs/volumes.h12
-rw-r--r--fs/btrfs/xattr.c35
-rw-r--r--fs/btrfs/zlib.c3
-rw-r--r--fs/buffer.c53
-rw-r--r--fs/cachefiles/interface.c2
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/debugfs.c6
-rw-r--r--fs/ceph/dir.c24
-rw-r--r--fs/ceph/file.c10
-rw-r--r--fs/ceph/inode.c25
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/ceph/snap.c6
-rw-r--r--fs/ceph/super.c11
-rw-r--r--fs/ceph/super.h66
-rw-r--r--fs/cifs/AUTHORS2
-rw-r--r--fs/cifs/README16
-rw-r--r--fs/cifs/cache.c2
-rw-r--r--fs/cifs/cifs_debug.c43
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifs_unicode.c35
-rw-r--r--fs/cifs/cifs_unicode.h2
-rw-r--r--fs/cifs/cifsencrypt.c21
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsglob.h13
-rw-r--r--fs/cifs/cifssmb.c16
-rw-r--r--fs/cifs/connect.c72
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c100
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/misc.c3
-rw-r--r--fs/cifs/sess.c23
-rw-r--r--fs/coda/Makefile2
-rw-r--r--fs/coda/sysctl.c17
-rw-r--r--fs/compat.c3
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/dcache.c89
-rw-r--r--fs/devpts/inode.c21
-rw-r--r--fs/direct-io.c7
-rw-r--r--fs/dlm/lock.c2
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/recover.c2
-rw-r--r--fs/drop_caches.c24
-rw-r--r--fs/ecryptfs/crypto.c23
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h37
-rw-r--r--fs/ecryptfs/file.c34
-rw-r--r--fs/ecryptfs/inode.c84
-rw-r--r--fs/ecryptfs/keystore.c272
-rw-r--r--fs/ecryptfs/kthread.c6
-rw-r--r--fs/ecryptfs/main.c82
-rw-r--r--fs/ecryptfs/mmap.c61
-rw-r--r--fs/ecryptfs/read_write.c12
-rw-r--r--fs/ecryptfs/super.c17
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/eventpoll.c60
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/common.h22
-rw-r--r--fs/exofs/dir.c33
-rw-r--r--fs/exofs/exofs.h6
-rw-r--r--fs/exofs/file.c16
-rw-r--r--fs/exofs/inode.c52
-rw-r--r--fs/exofs/super.c190
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/balloc.c6
-rw-r--r--fs/ext2/ext2.h6
-rw-r--r--fs/ext2/inode.c10
-rw-r--r--fs/ext2/ioctl.c6
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/acl.c2
-rw-r--r--fs/ext3/balloc.c10
-rw-r--r--fs/ext3/inode.c11
-rw-r--r--fs/ext3/ioctl.c6
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/acl.c2
-rw-r--r--fs/ext4/balloc.c5
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/ext4_jbd2.h11
-rw-r--r--fs/ext4/extents.c223
-rw-r--r--fs/ext4/fsync.c33
-rw-r--r--fs/ext4/ialloc.c8
-rw-r--r--fs/ext4/inode.c467
-rw-r--r--fs/ext4/ioctl.c15
-rw-r--r--fs/ext4/mballoc.c36
-rw-r--r--fs/ext4/mballoc.h2
-rw-r--r--fs/ext4/migrate.c12
-rw-r--r--fs/ext4/namei.c13
-rw-r--r--fs/ext4/page-io.c16
-rw-r--r--fs/ext4/resize.c12
-rw-r--r--fs/ext4/super.c126
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/fat/inode.c1
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/fhandle.c1
-rw-r--r--fs/fifo.c3
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_fshead.c2
-rw-r--r--fs/freevxfs/vxfs_lookup.c2
-rw-r--r--fs/freevxfs/vxfs_olt.h2
-rw-r--r--fs/freevxfs/vxfs_subr.c1
-rw-r--r--fs/fs-writeback.c143
-rw-r--r--fs/fuse/cuse.c12
-rw-r--r--fs/fuse/dev.c27
-rw-r--r--fs/fuse/dir.c38
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/generic_acl.c2
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/aops.c5
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/file.c60
-rw-r--r--fs/gfs2/glock.c8
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/inode.c56
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c12
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/super.c16
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c2
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/inode.c687
-rw-r--r--fs/internal.h8
-rw-r--r--fs/ioctl.c21
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/jbd/commit.c24
-rw-r--r--fs/jbd/journal.c4
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jbd2/commit.c28
-rw-r--r--fs/jbd2/journal.c7
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/jffs2/TODO2
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/summary.c4
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/Makefile2
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_dmap.c4
-rw-r--r--fs/jfs/jfs_extent.c6
-rw-r--r--fs/jfs/jfs_imap.c14
-rw-r--r--fs/jfs/jfs_logmgr.h2
-rw-r--r--fs/jfs/jfs_metapage.c1
-rw-r--r--fs/jfs/jfs_metapage.h2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/resize.c4
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/jfs/xattr.c2
-rw-r--r--fs/locks.c12
-rw-r--r--fs/logfs/compr.c2
-rw-r--r--fs/logfs/dev_bdev.c2
-rw-r--r--fs/logfs/dev_mtd.c2
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/logfs/inode.c2
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/mbcache.c2
-rw-r--r--fs/minix/Kconfig8
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/minix/minix.h74
-rw-r--r--fs/mpage.c8
-rw-r--r--fs/namei.c56
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/ncpfs/Makefile2
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/dir.c89
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h27
-rw-r--r--fs/nfs/namespace.c113
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4filelayout.c352
-rw-r--r--fs/nfs/nfs4filelayout.h4
-rw-r--r--fs/nfs/nfs4filelayoutdev.c178
-rw-r--r--fs/nfs/nfs4proc.c300
-rw-r--r--fs/nfs/nfs4state.c3
-rw-r--r--fs/nfs/nfs4xdr.c313
-rw-r--r--fs/nfs/pagelist.c12
-rw-r--r--fs/nfs/pnfs.c142
-rw-r--r--fs/nfs/pnfs.h83
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/write.c233
-rw-r--r--fs/nfs_common/nfsacl.c3
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/lockd.c1
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4idmap.c1
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c170
-rw-r--r--fs/nfsd/nfs4xdr.c5
-rw-r--r--fs/nfsd/nfsctl.c35
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nfsd/state.h12
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/alloc.h2
-rw-r--r--fs/nilfs2/bmap.c12
-rw-r--r--fs/nilfs2/bmap.h3
-rw-r--r--fs/nilfs2/btnode.c7
-rw-r--r--fs/nilfs2/btree.c6
-rw-r--r--fs/nilfs2/dir.c5
-rw-r--r--fs/nilfs2/direct.c4
-rw-r--r--fs/nilfs2/file.c15
-rw-r--r--fs/nilfs2/gcinode.c1
-rw-r--r--fs/nilfs2/inode.c84
-rw-r--r--fs/nilfs2/ioctl.c115
-rw-r--r--fs/nilfs2/mdt.c9
-rw-r--r--fs/nilfs2/mdt.h2
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/nilfs2/nilfs.h47
-rw-r--r--fs/nilfs2/page.c5
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/nilfs2/recovery.c32
-rw-r--r--fs/nilfs2/sb.h85
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/nilfs2/segment.c258
-rw-r--r--fs/nilfs2/segment.h14
-rw-r--r--fs/nilfs2/super.c214
-rw-r--r--fs/nilfs2/the_nilfs.c44
-rw-r--r--fs/nilfs2/the_nilfs.h51
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/inode_mark.c42
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c3
-rw-r--r--fs/notify/inotify/inotify_user.c39
-rw-r--r--fs/notify/mark.c3
-rw-r--r--fs/notify/vfsmount_mark.c1
-rw-r--r--fs/ntfs/Makefile19
-rw-r--r--fs/ntfs/aops.c4
-rw-r--r--fs/ntfs/attrib.c4
-rw-r--r--fs/ntfs/compress.c5
-rw-r--r--fs/ntfs/inode.c8
-rw-r--r--fs/ntfs/layout.h12
-rw-r--r--fs/ntfs/logfile.c2
-rw-r--r--fs/ntfs/logfile.h2
-rw-r--r--fs/ntfs/mft.c8
-rw-r--r--fs/ntfs/runlist.c2
-rw-r--r--fs/ntfs/super.c14
-rw-r--r--fs/ocfs2/Makefile4
-rw-r--r--fs/ocfs2/acl.c3
-rw-r--r--fs/ocfs2/alloc.c216
-rw-r--r--fs/ocfs2/aops.c83
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/buffer_head_io.c49
-rw-r--r--fs/ocfs2/cluster/heartbeat.c9
-rw-r--r--fs/ocfs2/cluster/masklog.c20
-rw-r--r--fs/ocfs2/cluster/masklog.h105
-rw-r--r--fs/ocfs2/cluster/quorum.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c12
-rw-r--r--fs/ocfs2/dcache.c45
-rw-r--r--fs/ocfs2/dir.c121
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c36
-rw-r--r--fs/ocfs2/dlm/dlmlock.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c10
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c9
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/dlmglue.c246
-rw-r--r--fs/ocfs2/export.c47
-rw-r--r--fs/ocfs2/extent_map.c10
-rw-r--r--fs/ocfs2/file.c220
-rw-r--r--fs/ocfs2/heartbeat.c4
-rw-r--r--fs/ocfs2/inode.c138
-rw-r--r--fs/ocfs2/ioctl.c43
-rw-r--r--fs/ocfs2/journal.c170
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/localalloc.c109
-rw-r--r--fs/ocfs2/locks.c1
-rw-r--r--fs/ocfs2/mmap.c7
-rw-r--r--fs/ocfs2/namei.c177
-rw-r--r--fs/ocfs2/ocfs2.h33
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/ocfs2_trace.h2739
-rw-r--r--fs/ocfs2/quota_global.c47
-rw-r--r--fs/ocfs2/quota_local.c16
-rw-r--r--fs/ocfs2/refcounttree.c158
-rw-r--r--fs/ocfs2/reservations.c57
-rw-r--r--fs/ocfs2/reservations.h2
-rw-r--r--fs/ocfs2/resize.c23
-rw-r--r--fs/ocfs2/slot_map.c16
-rw-r--r--fs/ocfs2/stackglue.h2
-rw-r--r--fs/ocfs2/suballoc.c193
-rw-r--r--fs/ocfs2/super.c91
-rw-r--r--fs/ocfs2/symlink.c14
-rw-r--r--fs/ocfs2/sysfile.c1
-rw-r--r--fs/ocfs2/uptodate.c73
-rw-r--r--fs/ocfs2/xattr.c159
-rw-r--r--fs/omfs/file.c1
-rw-r--r--fs/open.c13
-rw-r--r--fs/partitions/check.c7
-rw-r--r--fs/partitions/ldm.c16
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c189
-rw-r--r--fs/proc/generic.c8
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/root.c32
-rw-r--r--fs/proc/task_mmu.c138
-rw-r--r--fs/proc/task_nommu.c6
-rw-r--r--fs/pstore/Kconfig2
-rw-r--r--fs/pstore/inode.c76
-rw-r--r--fs/pstore/internal.h3
-rw-r--r--fs/pstore/platform.c31
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/quota/dquot.c56
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/reiserfs/Makefile4
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/select.c3
-rw-r--r--fs/squashfs/Kconfig12
-rw-r--r--fs/squashfs/cache.c4
-rw-r--r--fs/squashfs/decompressor.c34
-rw-r--r--fs/squashfs/decompressor.h7
-rw-r--r--fs/squashfs/dir.c9
-rw-r--r--fs/squashfs/lzo_wrapper.c4
-rw-r--r--fs/squashfs/namei.c12
-rw-r--r--fs/squashfs/squashfs.h1
-rw-r--r--fs/squashfs/squashfs_fs.h4
-rw-r--r--fs/squashfs/super.c15
-rw-r--r--fs/squashfs/xz_wrapper.c53
-rw-r--r--fs/squashfs/zlib_wrapper.c10
-rw-r--r--fs/super.c2
-rw-r--r--fs/sync.c28
-rw-r--r--fs/sysv/itree.c1
-rw-r--r--fs/ubifs/Kconfig11
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c65
-rw-r--r--fs/ubifs/debug.h152
-rw-r--r--fs/ubifs/file.c14
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/lprops.c2
-rw-r--r--fs/ubifs/lpt.c7
-rw-r--r--fs/ubifs/lpt_commit.c4
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/recovery.c26
-rw-r--r--fs/ubifs/super.c33
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/udf/balloc.c9
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/ufs/inode.c3
-rw-r--r--fs/ufs/super.c6
-rw-r--r--fs/ufs/truncate.c1
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/utimes.c2
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/Makefile12
-rw-r--r--fs/xfs/linux-2.6/kmem.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c392
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h40
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h23
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c126
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h40
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c293
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c265
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c2
-rw-r--r--fs/xfs/quota/xfs_dquot.c50
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c5
-rw-r--r--fs/xfs/quota/xfs_qm.c56
-rw-r--r--fs/xfs/quota/xfs_qm.h5
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c5
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c91
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c5
-rw-r--r--fs/xfs/support/debug.c107
-rw-r--r--fs/xfs/support/debug.h61
-rw-r--r--fs/xfs/xfs_alloc.c188
-rw-r--r--fs/xfs/xfs_bmap.c24
-rw-r--r--fs/xfs/xfs_buf_item.c17
-rw-r--r--fs/xfs/xfs_da_btree.c9
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c25
-rw-r--r--fs/xfs/xfs_error.c22
-rw-r--r--fs/xfs/xfs_error.h19
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_ialloc.c82
-rw-r--r--fs/xfs/xfs_inode.c133
-rw-r--r--fs/xfs/xfs_inode.h27
-rw-r--r--fs/xfs/xfs_inode_item.c73
-rw-r--r--fs/xfs/xfs_iomap.c12
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_log.c162
-rw-r--r--fs/xfs/xfs_log_priv.h7
-rw-r--r--fs/xfs/xfs_log_recover.c227
-rw-r--r--fs/xfs/xfs_mount.c148
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_quota.h3
-rw-r--r--fs/xfs/xfs_rtalloc.c92
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_rw.c58
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c423
-rw-r--r--fs/xfs/xfs_trans_buf.c9
-rw-r--r--fs/xfs/xfs_trans_inode.c24
-rw-r--r--fs/xfs/xfs_trans_priv.h22
-rw-r--r--fs/xfs/xfs_vnodeops.c81
-rw-r--r--fs/xfs/xfs_vnodeops.h1
490 files changed, 13578 insertions, 8324 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 515455296378..535ab6eccb1a 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -262,7 +262,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
262 if (strcmp(name, "") != 0) 262 if (strcmp(name, "") != 0)
263 return -EINVAL; 263 return -EINVAL;
264 264
265 v9ses = v9fs_inode2v9ses(dentry->d_inode); 265 v9ses = v9fs_dentry2v9ses(dentry);
266 /* 266 /*
267 * We allow set/get/list of acl when access=client is not specified 267 * We allow set/get/list of acl when access=client is not specified
268 */ 268 */
@@ -312,7 +312,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
312 if (strcmp(name, "") != 0) 312 if (strcmp(name, "") != 0)
313 return -EINVAL; 313 return -EINVAL;
314 314
315 v9ses = v9fs_inode2v9ses(dentry->d_inode); 315 v9ses = v9fs_dentry2v9ses(dentry);
316 /* 316 /*
317 * set the attribute on the remote. Without even looking at the 317 * set the attribute on the remote. Without even looking at the
318 * xattr value. We leave it to the server to validate 318 * xattr value. We leave it to the server to validate
@@ -323,7 +323,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
323 323
324 if (S_ISLNK(inode->i_mode)) 324 if (S_ISLNK(inode->i_mode))
325 return -EOPNOTSUPP; 325 return -EOPNOTSUPP;
326 if (!is_owner_or_cap(inode)) 326 if (!inode_owner_or_capable(inode))
327 return -EPERM; 327 return -EPERM;
328 if (value) { 328 if (value) {
329 /* update the cached acl value */ 329 /* update the cached acl value */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index cd63e002d826..85b67ffa2a43 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -134,7 +134,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
134 struct v9fs_session_info *v9ses; 134 struct v9fs_session_info *v9ses;
135 struct p9_fid *fid, *old_fid = NULL; 135 struct p9_fid *fid, *old_fid = NULL;
136 136
137 v9ses = v9fs_inode2v9ses(dentry->d_inode); 137 v9ses = v9fs_dentry2v9ses(dentry);
138 access = v9ses->flags & V9FS_ACCESS_MASK; 138 access = v9ses->flags & V9FS_ACCESS_MASK;
139 fid = v9fs_fid_find(dentry, uid, any); 139 fid = v9fs_fid_find(dentry, uid, any);
140 if (fid) 140 if (fid)
@@ -237,7 +237,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
237 int any, access; 237 int any, access;
238 struct v9fs_session_info *v9ses; 238 struct v9fs_session_info *v9ses;
239 239
240 v9ses = v9fs_inode2v9ses(dentry->d_inode); 240 v9ses = v9fs_dentry2v9ses(dentry);
241 access = v9ses->flags & V9FS_ACCESS_MASK; 241 access = v9ses->flags & V9FS_ACCESS_MASK;
242 switch (access) { 242 switch (access) {
243 case V9FS_ACCESS_SINGLE: 243 case V9FS_ACCESS_SINGLE:
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index bd8496db135b..e5ebedfc5ed8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,7 +116,6 @@ struct v9fs_session_info {
116 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
117 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
118 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120}; 119};
121 120
122/* cache_validity flags */ 121/* cache_validity flags */
@@ -130,6 +129,7 @@ struct v9fs_inode {
130#endif 129#endif
131 unsigned int cache_validity; 130 unsigned int cache_validity;
132 struct p9_fid *writeback_fid; 131 struct p9_fid *writeback_fid;
132 struct mutex v_mutex;
133 struct inode vfs_inode; 133 struct inode vfs_inode;
134}; 134};
135 135
@@ -173,6 +173,11 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
173 return (inode->i_sb->s_fs_info); 173 return (inode->i_sb->s_fs_info);
174} 174}
175 175
176static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry)
177{
178 return dentry->d_sb->s_fs_info;
179}
180
176static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) 181static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses)
177{ 182{
178 return v9ses->flags & V9FS_PROTO_2000U; 183 return v9ses->flags & V9FS_PROTO_2000U;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b6a3b9f7fe4d..e022890c6f40 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -126,7 +126,9 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
126 retval = v9fs_refresh_inode_dotl(fid, inode); 126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else 127 else
128 retval = v9fs_refresh_inode(fid, inode); 128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0) 129 if (retval == -ENOENT)
130 return 0;
131 if (retval < 0)
130 return retval; 132 return retval;
131 } 133 }
132out_valid: 134out_valid:
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 78bcb97c3425..ffed55817f0c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -90,7 +90,9 @@ int v9fs_file_open(struct inode *inode, struct file *file)
90 } 90 }
91 91
92 file->private_data = fid; 92 file->private_data = fid;
93 if (v9ses->cache && !v9inode->writeback_fid) { 93 mutex_lock(&v9inode->v_mutex);
94 if (v9ses->cache && !v9inode->writeback_fid &&
95 ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
94 /* 96 /*
95 * clone a fid and add it to writeback_fid 97 * clone a fid and add it to writeback_fid
96 * we do it during open time instead of 98 * we do it during open time instead of
@@ -101,10 +103,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
101 fid = v9fs_writeback_fid(file->f_path.dentry); 103 fid = v9fs_writeback_fid(file->f_path.dentry);
102 if (IS_ERR(fid)) { 104 if (IS_ERR(fid)) {
103 err = PTR_ERR(fid); 105 err = PTR_ERR(fid);
106 mutex_unlock(&v9inode->v_mutex);
104 goto out_error; 107 goto out_error;
105 } 108 }
106 v9inode->writeback_fid = (void *) fid; 109 v9inode->writeback_fid = (void *) fid;
107 } 110 }
111 mutex_unlock(&v9inode->v_mutex);
108#ifdef CONFIG_9P_FSCACHE 112#ifdef CONFIG_9P_FSCACHE
109 if (v9ses->cache) 113 if (v9ses->cache)
110 v9fs_cache_inode_set_cookie(inode, file); 114 v9fs_cache_inode_set_cookie(inode, file);
@@ -504,9 +508,12 @@ v9fs_file_write(struct file *filp, const char __user * data,
504 if (!count) 508 if (!count)
505 goto out; 509 goto out;
506 510
507 return v9fs_file_write_internal(filp->f_path.dentry->d_inode, 511 retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode,
508 filp->private_data, 512 filp->private_data,
509 data, count, offset, 1); 513 data, count, &origin, 1);
514 /* update offset on successful write */
515 if (retval > 0)
516 *offset = origin;
510out: 517out:
511 return retval; 518 return retval;
512} 519}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8a2c232f708a..7f6c67703195 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -221,6 +221,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
221#endif 221#endif
222 v9inode->writeback_fid = NULL; 222 v9inode->writeback_fid = NULL;
223 v9inode->cache_validity = 0; 223 v9inode->cache_validity = 0;
224 mutex_init(&v9inode->v_mutex);
224 return &v9inode->vfs_inode; 225 return &v9inode->vfs_inode;
225} 226}
226 227
@@ -650,7 +651,9 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
650 /* if we are opening a file, assign the open fid to the file */ 651 /* if we are opening a file, assign the open fid to the file */
651 if (nd && nd->flags & LOOKUP_OPEN) { 652 if (nd && nd->flags & LOOKUP_OPEN) {
652 v9inode = V9FS_I(dentry->d_inode); 653 v9inode = V9FS_I(dentry->d_inode);
653 if (v9ses->cache && !v9inode->writeback_fid) { 654 mutex_lock(&v9inode->v_mutex);
655 if (v9ses->cache && !v9inode->writeback_fid &&
656 ((flags & O_ACCMODE) != O_RDONLY)) {
654 /* 657 /*
655 * clone a fid and add it to writeback_fid 658 * clone a fid and add it to writeback_fid
656 * we do it during open time instead of 659 * we do it during open time instead of
@@ -661,10 +664,12 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
661 inode_fid = v9fs_writeback_fid(dentry); 664 inode_fid = v9fs_writeback_fid(dentry);
662 if (IS_ERR(inode_fid)) { 665 if (IS_ERR(inode_fid)) {
663 err = PTR_ERR(inode_fid); 666 err = PTR_ERR(inode_fid);
667 mutex_unlock(&v9inode->v_mutex);
664 goto error; 668 goto error;
665 } 669 }
666 v9inode->writeback_fid = (void *) inode_fid; 670 v9inode->writeback_fid = (void *) inode_fid;
667 } 671 }
672 mutex_unlock(&v9inode->v_mutex);
668 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 673 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
669 if (IS_ERR(filp)) { 674 if (IS_ERR(filp)) {
670 err = PTR_ERR(filp); 675 err = PTR_ERR(filp);
@@ -931,7 +936,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
931 936
932 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 937 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
933 err = -EPERM; 938 err = -EPERM;
934 v9ses = v9fs_inode2v9ses(dentry->d_inode); 939 v9ses = v9fs_dentry2v9ses(dentry);
935 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 940 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
936 generic_fillattr(dentry->d_inode, stat); 941 generic_fillattr(dentry->d_inode, stat);
937 return 0; 942 return 0;
@@ -967,8 +972,12 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
967 struct p9_wstat wstat; 972 struct p9_wstat wstat;
968 973
969 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 974 P9_DPRINTK(P9_DEBUG_VFS, "\n");
975 retval = inode_change_ok(dentry->d_inode, iattr);
976 if (retval)
977 return retval;
978
970 retval = -EPERM; 979 retval = -EPERM;
971 v9ses = v9fs_inode2v9ses(dentry->d_inode); 980 v9ses = v9fs_dentry2v9ses(dentry);
972 fid = v9fs_fid_lookup(dentry); 981 fid = v9fs_fid_lookup(dentry);
973 if(IS_ERR(fid)) 982 if(IS_ERR(fid))
974 return PTR_ERR(fid); 983 return PTR_ERR(fid);
@@ -993,12 +1002,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
993 if (iattr->ia_valid & ATTR_GID) 1002 if (iattr->ia_valid & ATTR_GID)
994 wstat.n_gid = iattr->ia_gid; 1003 wstat.n_gid = iattr->ia_gid;
995 } 1004 }
996 if ((iattr->ia_valid & ATTR_SIZE) && 1005
997 iattr->ia_size != i_size_read(dentry->d_inode)) {
998 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
999 if (retval)
1000 return retval;
1001 }
1002 /* Write all dirty data */ 1006 /* Write all dirty data */
1003 if (S_ISREG(dentry->d_inode->i_mode)) 1007 if (S_ISREG(dentry->d_inode->i_mode))
1004 filemap_write_and_wait(dentry->d_inode->i_mapping); 1008 filemap_write_and_wait(dentry->d_inode->i_mapping);
@@ -1006,6 +1010,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1006 retval = p9_client_wstat(fid, &wstat); 1010 retval = p9_client_wstat(fid, &wstat);
1007 if (retval < 0) 1011 if (retval < 0)
1008 return retval; 1012 return retval;
1013
1014 if ((iattr->ia_valid & ATTR_SIZE) &&
1015 iattr->ia_size != i_size_read(dentry->d_inode))
1016 truncate_setsize(dentry->d_inode, iattr->ia_size);
1017
1009 v9fs_invalidate_inode_attr(dentry->d_inode); 1018 v9fs_invalidate_inode_attr(dentry->d_inode);
1010 1019
1011 setattr_copy(dentry->d_inode, iattr); 1020 setattr_copy(dentry->d_inode, iattr);
@@ -1130,7 +1139,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1130 1139
1131 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); 1140 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
1132 retval = -EPERM; 1141 retval = -EPERM;
1133 v9ses = v9fs_inode2v9ses(dentry->d_inode); 1142 v9ses = v9fs_dentry2v9ses(dentry);
1134 fid = v9fs_fid_lookup(dentry); 1143 fid = v9fs_fid_lookup(dentry);
1135 if (IS_ERR(fid)) 1144 if (IS_ERR(fid))
1136 return PTR_ERR(fid); 1145 return PTR_ERR(fid);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 67c138e94feb..82a7c38ddad0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -245,7 +245,9 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
245 v9fs_set_create_acl(dentry, dacl, pacl); 245 v9fs_set_create_acl(dentry, dacl, pacl);
246 246
247 v9inode = V9FS_I(inode); 247 v9inode = V9FS_I(inode);
248 if (v9ses->cache && !v9inode->writeback_fid) { 248 mutex_lock(&v9inode->v_mutex);
249 if (v9ses->cache && !v9inode->writeback_fid &&
250 ((flags & O_ACCMODE) != O_RDONLY)) {
249 /* 251 /*
250 * clone a fid and add it to writeback_fid 252 * clone a fid and add it to writeback_fid
251 * we do it during open time instead of 253 * we do it during open time instead of
@@ -256,10 +258,12 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
256 inode_fid = v9fs_writeback_fid(dentry); 258 inode_fid = v9fs_writeback_fid(dentry);
257 if (IS_ERR(inode_fid)) { 259 if (IS_ERR(inode_fid)) {
258 err = PTR_ERR(inode_fid); 260 err = PTR_ERR(inode_fid);
261 mutex_unlock(&v9inode->v_mutex);
259 goto error; 262 goto error;
260 } 263 }
261 v9inode->writeback_fid = (void *) inode_fid; 264 v9inode->writeback_fid = (void *) inode_fid;
262 } 265 }
266 mutex_unlock(&v9inode->v_mutex);
263 /* Since we are opening a file, assign the open fid to the file */ 267 /* Since we are opening a file, assign the open fid to the file */
264 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
265 if (IS_ERR(filp)) { 269 if (IS_ERR(filp)) {
@@ -391,7 +395,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
391 395
392 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 396 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
393 err = -EPERM; 397 err = -EPERM;
394 v9ses = v9fs_inode2v9ses(dentry->d_inode); 398 v9ses = v9fs_dentry2v9ses(dentry);
395 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 399 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
396 generic_fillattr(dentry->d_inode, stat); 400 generic_fillattr(dentry->d_inode, stat);
397 return 0; 401 return 0;
@@ -448,17 +452,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
448 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; 452 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
449 453
450 retval = -EPERM; 454 retval = -EPERM;
451 v9ses = v9fs_inode2v9ses(dentry->d_inode); 455 v9ses = v9fs_dentry2v9ses(dentry);
452 fid = v9fs_fid_lookup(dentry); 456 fid = v9fs_fid_lookup(dentry);
453 if (IS_ERR(fid)) 457 if (IS_ERR(fid))
454 return PTR_ERR(fid); 458 return PTR_ERR(fid);
455 459
456 if ((iattr->ia_valid & ATTR_SIZE) &&
457 iattr->ia_size != i_size_read(dentry->d_inode)) {
458 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
459 if (retval)
460 return retval;
461 }
462 /* Write all dirty data */ 460 /* Write all dirty data */
463 if (S_ISREG(dentry->d_inode->i_mode)) 461 if (S_ISREG(dentry->d_inode->i_mode))
464 filemap_write_and_wait(dentry->d_inode->i_mapping); 462 filemap_write_and_wait(dentry->d_inode->i_mapping);
@@ -466,8 +464,12 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
466 retval = p9_client_setattr(fid, &p9attr); 464 retval = p9_client_setattr(fid, &p9attr);
467 if (retval < 0) 465 if (retval < 0)
468 return retval; 466 return retval;
469 v9fs_invalidate_inode_attr(dentry->d_inode);
470 467
468 if ((iattr->ia_valid & ATTR_SIZE) &&
469 iattr->ia_size != i_size_read(dentry->d_inode))
470 truncate_setsize(dentry->d_inode, iattr->ia_size);
471
472 v9fs_invalidate_inode_attr(dentry->d_inode);
471 setattr_copy(dentry->d_inode, iattr); 473 setattr_copy(dentry->d_inode, iattr);
472 mark_inode_dirty(dentry->d_inode); 474 mark_inode_dirty(dentry->d_inode);
473 if (iattr->ia_valid & ATTR_MODE) { 475 if (iattr->ia_valid & ATTR_MODE) {
@@ -809,7 +811,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
809 fid = v9fs_fid_lookup(dentry); 811 fid = v9fs_fid_lookup(dentry);
810 if (IS_ERR(fid)) { 812 if (IS_ERR(fid)) {
811 __putname(link); 813 __putname(link);
812 link = ERR_PTR(PTR_ERR(fid)); 814 link = ERR_CAST(fid);
813 goto ndset; 815 goto ndset;
814 } 816 }
815 retval = p9_client_readlink(fid, &target); 817 retval = p9_client_readlink(fid, &target);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 09fd08d1606f..feef6cdc1fd2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
154 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
155 goto release_sb; 155 goto release_sb;
156 } 156 }
157
157 root = d_alloc_root(inode); 158 root = d_alloc_root(inode);
158 if (!root) { 159 if (!root) {
159 iput(inode); 160 iput(inode);
@@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
185 p9stat_free(st); 186 p9stat_free(st);
186 kfree(st); 187 kfree(st);
187 } 188 }
188 v9fs_fid_add(root, fid);
189 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
190 if (retval) 190 if (retval)
191 goto release_sb; 191 goto release_sb;
192 /* 192 v9fs_fid_add(root, fid);
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
203 193
204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 194 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
205 return dget(sb->s_root); 195 return dget(sb->s_root);
@@ -210,11 +200,15 @@ close_session:
210 v9fs_session_close(v9ses); 200 v9fs_session_close(v9ses);
211 kfree(v9ses); 201 kfree(v9ses);
212 return ERR_PTR(retval); 202 return ERR_PTR(retval);
203
213release_sb: 204release_sb:
214 /* 205 /*
215 * we will do the session_close and root dentry 206 * we will do the session_close and root dentry release
216 * release in the below call. 207 * in the below call. But we need to clunk fid, because we haven't
208 * attached the fid to dentry so it won't get clunked
209 * automatically.
217 */ 210 */
211 p9_client_clunk(fid);
218 deactivate_locked_super(sb); 212 deactivate_locked_super(sb);
219 return ERR_PTR(retval); 213 return ERR_PTR(retval);
220} 214}
@@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s)
232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 226 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
233 227
234 kill_anon_super(s); 228 kill_anon_super(s);
235 p9_client_clunk(v9ses->root_fid); 229
236 v9fs_session_cancel(v9ses); 230 v9fs_session_cancel(v9ses);
237 v9fs_session_close(v9ses); 231 v9fs_session_close(v9ses);
238 kfree(v9ses); 232 kfree(v9ses);
@@ -262,7 +256,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
262 goto done; 256 goto done;
263 } 257 }
264 258
265 v9ses = v9fs_inode2v9ses(dentry->d_inode); 259 v9ses = v9fs_dentry2v9ses(dentry);
266 if (v9fs_proto_dotl(v9ses)) { 260 if (v9fs_proto_dotl(v9ses)) {
267 res = p9_client_statfs(fid, &rs); 261 res = p9_client_statfs(fid, &rs);
268 if (res == 0) { 262 if (res == 0) {
@@ -285,14 +279,6 @@ done:
285 return res; 279 return res;
286} 280}
287 281
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode) 282static int v9fs_drop_inode(struct inode *inode)
297{ 283{
298 struct v9fs_session_info *v9ses; 284 struct v9fs_session_info *v9ses;
@@ -307,6 +293,51 @@ static int v9fs_drop_inode(struct inode *inode)
307 return 1; 293 return 1;
308} 294}
309 295
296static int v9fs_write_inode(struct inode *inode,
297 struct writeback_control *wbc)
298{
299 int ret;
300 struct p9_wstat wstat;
301 struct v9fs_inode *v9inode;
302 /*
303 * send an fsync request to server irrespective of
304 * wbc->sync_mode.
305 */
306 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
307 v9inode = V9FS_I(inode);
308 if (!v9inode->writeback_fid)
309 return 0;
310 v9fs_blank_wstat(&wstat);
311
312 ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
313 if (ret < 0) {
314 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
315 return ret;
316 }
317 return 0;
318}
319
320static int v9fs_write_inode_dotl(struct inode *inode,
321 struct writeback_control *wbc)
322{
323 int ret;
324 struct v9fs_inode *v9inode;
325 /*
326 * send an fsync request to server irrespective of
327 * wbc->sync_mode.
328 */
329 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
330 v9inode = V9FS_I(inode);
331 if (!v9inode->writeback_fid)
332 return 0;
333 ret = p9_client_fsync(v9inode->writeback_fid, 0);
334 if (ret < 0) {
335 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
336 return ret;
337 }
338 return 0;
339}
340
310static const struct super_operations v9fs_super_ops = { 341static const struct super_operations v9fs_super_ops = {
311 .alloc_inode = v9fs_alloc_inode, 342 .alloc_inode = v9fs_alloc_inode,
312 .destroy_inode = v9fs_destroy_inode, 343 .destroy_inode = v9fs_destroy_inode,
@@ -314,17 +345,18 @@ static const struct super_operations v9fs_super_ops = {
314 .evict_inode = v9fs_evict_inode, 345 .evict_inode = v9fs_evict_inode,
315 .show_options = generic_show_options, 346 .show_options = generic_show_options,
316 .umount_begin = v9fs_umount_begin, 347 .umount_begin = v9fs_umount_begin,
348 .write_inode = v9fs_write_inode,
317}; 349};
318 350
319static const struct super_operations v9fs_super_ops_dotl = { 351static const struct super_operations v9fs_super_ops_dotl = {
320 .alloc_inode = v9fs_alloc_inode, 352 .alloc_inode = v9fs_alloc_inode,
321 .destroy_inode = v9fs_destroy_inode, 353 .destroy_inode = v9fs_destroy_inode,
322 .sync_fs = v9fs_sync_fs,
323 .statfs = v9fs_statfs, 354 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode, 355 .drop_inode = v9fs_drop_inode,
325 .evict_inode = v9fs_evict_inode, 356 .evict_inode = v9fs_evict_inode,
326 .show_options = generic_show_options, 357 .show_options = generic_show_options,
327 .umount_begin = v9fs_umount_begin, 358 .umount_begin = v9fs_umount_begin,
359 .write_inode = v9fs_write_inode_dotl,
328}; 360};
329 361
330struct file_system_type v9fs_fs_type = { 362struct file_system_type v9fs_fs_type = {
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 2ff622f6f547..718ac1f440c6 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -50,6 +50,7 @@ struct adfs_sb_info {
50 gid_t s_gid; /* owner gid */ 50 gid_t s_gid; /* owner gid */
51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ 51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
52 umode_t s_other_mask; /* ADFS other perm -> unix perm */ 52 umode_t s_other_mask; /* ADFS other perm -> unix perm */
53 int s_ftsuffix; /* ,xyz hex filetype suffix option */
53 54
54 __u32 s_ids_per_zone; /* max. no ids in one zone */ 55 __u32 s_ids_per_zone; /* max. no ids in one zone */
55 __u32 s_idlen; /* length of ID in map */ 56 __u32 s_idlen; /* length of ID in map */
@@ -79,6 +80,10 @@ struct adfs_dir {
79 80
80 int nr_buffers; 81 int nr_buffers;
81 struct buffer_head *bh[4]; 82 struct buffer_head *bh[4];
83
84 /* big directories need allocated buffers */
85 struct buffer_head **bh_fplus;
86
82 unsigned int pos; 87 unsigned int pos;
83 unsigned int parent_id; 88 unsigned int parent_id;
84 89
@@ -89,7 +94,7 @@ struct adfs_dir {
89/* 94/*
90 * This is the overall maximum name length 95 * This is the overall maximum name length
91 */ 96 */
92#define ADFS_MAX_NAME_LEN 256 97#define ADFS_MAX_NAME_LEN (256 + 4) /* +4 for ,xyz hex filetype suffix */
93struct object_info { 98struct object_info {
94 __u32 parent_id; /* parent object id */ 99 __u32 parent_id; /* parent object id */
95 __u32 file_id; /* object id */ 100 __u32 file_id; /* object id */
@@ -97,10 +102,26 @@ struct object_info {
97 __u32 execaddr; /* execution address */ 102 __u32 execaddr; /* execution address */
98 __u32 size; /* size */ 103 __u32 size; /* size */
99 __u8 attr; /* RISC OS attributes */ 104 __u8 attr; /* RISC OS attributes */
100 unsigned char name_len; /* name length */ 105 unsigned int name_len; /* name length */
101 char name[ADFS_MAX_NAME_LEN];/* file name */ 106 char name[ADFS_MAX_NAME_LEN];/* file name */
107
108 /* RISC OS file type (12-bit: derived from loadaddr) */
109 __u16 filetype;
102}; 110};
103 111
112/* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */
113static inline int append_filetype_suffix(char *buf, __u16 filetype)
114{
115 if (filetype == 0xffff) /* no explicit 12-bit file type was set */
116 return 0;
117
118 *buf++ = ',';
119 *buf++ = hex_asc_lo(filetype >> 8);
120 *buf++ = hex_asc_lo(filetype >> 4);
121 *buf++ = hex_asc_lo(filetype >> 0);
122 return 4;
123}
124
104struct adfs_dir_ops { 125struct adfs_dir_ops {
105 int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir); 126 int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir);
106 int (*setpos)(struct adfs_dir *dir, unsigned int fpos); 127 int (*setpos)(struct adfs_dir *dir, unsigned int fpos);
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index bafc71222e25..4bbe853ee50a 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -52,7 +52,6 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen)
52 *buf++ = *ptr; 52 *buf++ = *ptr;
53 ptr++; 53 ptr++;
54 } 54 }
55 *buf = '\0';
56 55
57 return buf - old_buf; 56 return buf - old_buf;
58} 57}
@@ -208,7 +207,8 @@ release_buffers:
208 * convert a disk-based directory entry to a Linux ADFS directory entry 207 * convert a disk-based directory entry to a Linux ADFS directory entry
209 */ 208 */
210static inline void 209static inline void
211adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de) 210adfs_dir2obj(struct adfs_dir *dir, struct object_info *obj,
211 struct adfs_direntry *de)
212{ 212{
213 obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN); 213 obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN);
214 obj->file_id = adfs_readval(de->dirinddiscadd, 3); 214 obj->file_id = adfs_readval(de->dirinddiscadd, 3);
@@ -216,6 +216,23 @@ adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de)
216 obj->execaddr = adfs_readval(de->direxec, 4); 216 obj->execaddr = adfs_readval(de->direxec, 4);
217 obj->size = adfs_readval(de->dirlen, 4); 217 obj->size = adfs_readval(de->dirlen, 4);
218 obj->attr = de->newdiratts; 218 obj->attr = de->newdiratts;
219 obj->filetype = -1;
220
221 /*
222 * object is a file and is filetyped and timestamped?
223 * RISC OS 12-bit filetype is stored in load_address[19:8]
224 */
225 if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
226 (0xfff00000 == (0xfff00000 & obj->loadaddr))) {
227 obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
228
229 /* optionally append the ,xyz hex filetype suffix */
230 if (ADFS_SB(dir->sb)->s_ftsuffix)
231 obj->name_len +=
232 append_filetype_suffix(
233 &obj->name[obj->name_len],
234 obj->filetype);
235 }
219} 236}
220 237
221/* 238/*
@@ -260,7 +277,7 @@ __adfs_dir_get(struct adfs_dir *dir, int pos, struct object_info *obj)
260 if (!de.dirobname[0]) 277 if (!de.dirobname[0])
261 return -ENOENT; 278 return -ENOENT;
262 279
263 adfs_dir2obj(obj, &de); 280 adfs_dir2obj(dir, obj, &de);
264 281
265 return 0; 282 return 0;
266} 283}
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 1796bb352d05..d9e3bee4e653 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -8,6 +8,7 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h>
11#include "adfs.h" 12#include "adfs.h"
12#include "dir_fplus.h" 13#include "dir_fplus.h"
13 14
@@ -22,30 +23,53 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
22 23
23 dir->nr_buffers = 0; 24 dir->nr_buffers = 0;
24 25
26 /* start off using fixed bh set - only alloc for big dirs */
27 dir->bh_fplus = &dir->bh[0];
28
25 block = __adfs_block_map(sb, id, 0); 29 block = __adfs_block_map(sb, id, 0);
26 if (!block) { 30 if (!block) {
27 adfs_error(sb, "dir object %X has a hole at offset 0", id); 31 adfs_error(sb, "dir object %X has a hole at offset 0", id);
28 goto out; 32 goto out;
29 } 33 }
30 34
31 dir->bh[0] = sb_bread(sb, block); 35 dir->bh_fplus[0] = sb_bread(sb, block);
32 if (!dir->bh[0]) 36 if (!dir->bh_fplus[0])
33 goto out; 37 goto out;
34 dir->nr_buffers += 1; 38 dir->nr_buffers += 1;
35 39
36 h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 40 h = (struct adfs_bigdirheader *)dir->bh_fplus[0]->b_data;
37 size = le32_to_cpu(h->bigdirsize); 41 size = le32_to_cpu(h->bigdirsize);
38 if (size != sz) { 42 if (size != sz) {
39 printk(KERN_WARNING "adfs: adfs_fplus_read: directory header size\n" 43 printk(KERN_WARNING "adfs: adfs_fplus_read:"
40 " does not match directory size\n"); 44 " directory header size %X\n"
45 " does not match directory size %X\n",
46 size, sz);
41 } 47 }
42 48
43 if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 || 49 if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 ||
44 h->bigdirversion[2] != 0 || size & 2047 || 50 h->bigdirversion[2] != 0 || size & 2047 ||
45 h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) 51 h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) {
52 printk(KERN_WARNING "adfs: dir object %X has"
53 " malformed dir header\n", id);
46 goto out; 54 goto out;
55 }
47 56
48 size >>= sb->s_blocksize_bits; 57 size >>= sb->s_blocksize_bits;
58 if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
59 /* this directory is too big for fixed bh set, must allocate */
60 struct buffer_head **bh_fplus =
61 kzalloc(size * sizeof(struct buffer_head *),
62 GFP_KERNEL);
63 if (!bh_fplus) {
64 adfs_error(sb, "not enough memory for"
65 " dir object %X (%d blocks)", id, size);
66 goto out;
67 }
68 dir->bh_fplus = bh_fplus;
69 /* copy over the pointer to the block that we've already read */
70 dir->bh_fplus[0] = dir->bh[0];
71 }
72
49 for (blk = 1; blk < size; blk++) { 73 for (blk = 1; blk < size; blk++) {
50 block = __adfs_block_map(sb, id, blk); 74 block = __adfs_block_map(sb, id, blk);
51 if (!block) { 75 if (!block) {
@@ -53,25 +77,44 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
53 goto out; 77 goto out;
54 } 78 }
55 79
56 dir->bh[blk] = sb_bread(sb, block); 80 dir->bh_fplus[blk] = sb_bread(sb, block);
57 if (!dir->bh[blk]) 81 if (!dir->bh_fplus[blk]) {
82 adfs_error(sb, "dir object %X failed read for"
83 " offset %d, mapped block %X",
84 id, blk, block);
58 goto out; 85 goto out;
59 dir->nr_buffers = blk; 86 }
87
88 dir->nr_buffers += 1;
60 } 89 }
61 90
62 t = (struct adfs_bigdirtail *)(dir->bh[size - 1]->b_data + (sb->s_blocksize - 8)); 91 t = (struct adfs_bigdirtail *)
92 (dir->bh_fplus[size - 1]->b_data + (sb->s_blocksize - 8));
63 93
64 if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) || 94 if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) ||
65 t->bigdirendmasseq != h->startmasseq || 95 t->bigdirendmasseq != h->startmasseq ||
66 t->reserved[0] != 0 || t->reserved[1] != 0) 96 t->reserved[0] != 0 || t->reserved[1] != 0) {
97 printk(KERN_WARNING "adfs: dir object %X has "
98 "malformed dir end\n", id);
67 goto out; 99 goto out;
100 }
68 101
69 dir->parent_id = le32_to_cpu(h->bigdirparent); 102 dir->parent_id = le32_to_cpu(h->bigdirparent);
70 dir->sb = sb; 103 dir->sb = sb;
71 return 0; 104 return 0;
105
72out: 106out:
73 for (i = 0; i < dir->nr_buffers; i++) 107 if (dir->bh_fplus) {
74 brelse(dir->bh[i]); 108 for (i = 0; i < dir->nr_buffers; i++)
109 brelse(dir->bh_fplus[i]);
110
111 if (&dir->bh[0] != dir->bh_fplus)
112 kfree(dir->bh_fplus);
113
114 dir->bh_fplus = NULL;
115 }
116
117 dir->nr_buffers = 0;
75 dir->sb = NULL; 118 dir->sb = NULL;
76 return ret; 119 return ret;
77} 120}
@@ -79,7 +122,8 @@ out:
79static int 122static int
80adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos) 123adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos)
81{ 124{
82 struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 125 struct adfs_bigdirheader *h =
126 (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
83 int ret = -ENOENT; 127 int ret = -ENOENT;
84 128
85 if (fpos <= le32_to_cpu(h->bigdirentries)) { 129 if (fpos <= le32_to_cpu(h->bigdirentries)) {
@@ -102,21 +146,27 @@ dir_memcpy(struct adfs_dir *dir, unsigned int offset, void *to, int len)
102 partial = sb->s_blocksize - offset; 146 partial = sb->s_blocksize - offset;
103 147
104 if (partial >= len) 148 if (partial >= len)
105 memcpy(to, dir->bh[buffer]->b_data + offset, len); 149 memcpy(to, dir->bh_fplus[buffer]->b_data + offset, len);
106 else { 150 else {
107 char *c = (char *)to; 151 char *c = (char *)to;
108 152
109 remainder = len - partial; 153 remainder = len - partial;
110 154
111 memcpy(c, dir->bh[buffer]->b_data + offset, partial); 155 memcpy(c,
112 memcpy(c + partial, dir->bh[buffer + 1]->b_data, remainder); 156 dir->bh_fplus[buffer]->b_data + offset,
157 partial);
158
159 memcpy(c + partial,
160 dir->bh_fplus[buffer + 1]->b_data,
161 remainder);
113 } 162 }
114} 163}
115 164
116static int 165static int
117adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) 166adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
118{ 167{
119 struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 168 struct adfs_bigdirheader *h =
169 (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
120 struct adfs_bigdirentry bde; 170 struct adfs_bigdirentry bde;
121 unsigned int offset; 171 unsigned int offset;
122 int i, ret = -ENOENT; 172 int i, ret = -ENOENT;
@@ -147,6 +197,24 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
147 if (obj->name[i] == '/') 197 if (obj->name[i] == '/')
148 obj->name[i] = '.'; 198 obj->name[i] = '.';
149 199
200 obj->filetype = -1;
201
202 /*
203 * object is a file and is filetyped and timestamped?
204 * RISC OS 12-bit filetype is stored in load_address[19:8]
205 */
206 if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
207 (0xfff00000 == (0xfff00000 & obj->loadaddr))) {
208 obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
209
210 /* optionally append the ,xyz hex filetype suffix */
211 if (ADFS_SB(dir->sb)->s_ftsuffix)
212 obj->name_len +=
213 append_filetype_suffix(
214 &obj->name[obj->name_len],
215 obj->filetype);
216 }
217
150 dir->pos += 1; 218 dir->pos += 1;
151 ret = 0; 219 ret = 0;
152out: 220out:
@@ -160,7 +228,7 @@ adfs_fplus_sync(struct adfs_dir *dir)
160 int i; 228 int i;
161 229
162 for (i = dir->nr_buffers - 1; i >= 0; i--) { 230 for (i = dir->nr_buffers - 1; i >= 0; i--) {
163 struct buffer_head *bh = dir->bh[i]; 231 struct buffer_head *bh = dir->bh_fplus[i];
164 sync_dirty_buffer(bh); 232 sync_dirty_buffer(bh);
165 if (buffer_req(bh) && !buffer_uptodate(bh)) 233 if (buffer_req(bh) && !buffer_uptodate(bh))
166 err = -EIO; 234 err = -EIO;
@@ -174,8 +242,17 @@ adfs_fplus_free(struct adfs_dir *dir)
174{ 242{
175 int i; 243 int i;
176 244
177 for (i = 0; i < dir->nr_buffers; i++) 245 if (dir->bh_fplus) {
178 brelse(dir->bh[i]); 246 for (i = 0; i < dir->nr_buffers; i++)
247 brelse(dir->bh_fplus[i]);
248
249 if (&dir->bh[0] != dir->bh_fplus)
250 kfree(dir->bh_fplus);
251
252 dir->bh_fplus = NULL;
253 }
254
255 dir->nr_buffers = 0;
179 dir->sb = NULL; 256 dir->sb = NULL;
180} 257}
181 258
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 09fe40198d1c..d5250c5aae21 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -72,32 +72,18 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
72static const struct address_space_operations adfs_aops = { 72static const struct address_space_operations adfs_aops = {
73 .readpage = adfs_readpage, 73 .readpage = adfs_readpage,
74 .writepage = adfs_writepage, 74 .writepage = adfs_writepage,
75 .sync_page = block_sync_page,
76 .write_begin = adfs_write_begin, 75 .write_begin = adfs_write_begin,
77 .write_end = generic_write_end, 76 .write_end = generic_write_end,
78 .bmap = _adfs_bmap 77 .bmap = _adfs_bmap
79}; 78};
80 79
81static inline unsigned int
82adfs_filetype(struct inode *inode)
83{
84 unsigned int type;
85
86 if (ADFS_I(inode)->stamped)
87 type = (ADFS_I(inode)->loadaddr >> 8) & 0xfff;
88 else
89 type = (unsigned int) -1;
90
91 return type;
92}
93
94/* 80/*
95 * Convert ADFS attributes and filetype to Linux permission. 81 * Convert ADFS attributes and filetype to Linux permission.
96 */ 82 */
97static umode_t 83static umode_t
98adfs_atts2mode(struct super_block *sb, struct inode *inode) 84adfs_atts2mode(struct super_block *sb, struct inode *inode)
99{ 85{
100 unsigned int filetype, attr = ADFS_I(inode)->attr; 86 unsigned int attr = ADFS_I(inode)->attr;
101 umode_t mode, rmask; 87 umode_t mode, rmask;
102 struct adfs_sb_info *asb = ADFS_SB(sb); 88 struct adfs_sb_info *asb = ADFS_SB(sb);
103 89
@@ -106,9 +92,7 @@ adfs_atts2mode(struct super_block *sb, struct inode *inode)
106 return S_IFDIR | S_IXUGO | mode; 92 return S_IFDIR | S_IXUGO | mode;
107 } 93 }
108 94
109 filetype = adfs_filetype(inode); 95 switch (ADFS_I(inode)->filetype) {
110
111 switch (filetype) {
112 case 0xfc0: /* LinkFS */ 96 case 0xfc0: /* LinkFS */
113 return S_IFLNK|S_IRWXUGO; 97 return S_IFLNK|S_IRWXUGO;
114 98
@@ -174,50 +158,48 @@ adfs_mode2atts(struct super_block *sb, struct inode *inode)
174 158
175/* 159/*
176 * Convert an ADFS time to Unix time. ADFS has a 40-bit centi-second time 160 * Convert an ADFS time to Unix time. ADFS has a 40-bit centi-second time
177 * referenced to 1 Jan 1900 (til 2248) 161 * referenced to 1 Jan 1900 (til 2248) so we need to discard 2208988800 seconds
162 * of time to convert from RISC OS epoch to Unix epoch.
178 */ 163 */
179static void 164static void
180adfs_adfs2unix_time(struct timespec *tv, struct inode *inode) 165adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
181{ 166{
182 unsigned int high, low; 167 unsigned int high, low;
168 /* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since
169 * 01 Jan 1900 00:00:00 (RISC OS epoch)
170 */
171 static const s64 nsec_unix_epoch_diff_risc_os_epoch =
172 2208988800000000000LL;
173 s64 nsec;
183 174
184 if (ADFS_I(inode)->stamped == 0) 175 if (ADFS_I(inode)->stamped == 0)
185 goto cur_time; 176 goto cur_time;
186 177
187 high = ADFS_I(inode)->loadaddr << 24; 178 high = ADFS_I(inode)->loadaddr & 0xFF; /* top 8 bits of timestamp */
188 low = ADFS_I(inode)->execaddr; 179 low = ADFS_I(inode)->execaddr; /* bottom 32 bits of timestamp */
189 180
190 high |= low >> 8; 181 /* convert 40-bit centi-seconds to 32-bit seconds
191 low &= 255; 182 * going via nanoseconds to retain precision
183 */
184 nsec = (((s64) high << 32) | (s64) low) * 10000000; /* cs to ns */
192 185
193 /* Files dated pre 01 Jan 1970 00:00:00. */ 186 /* Files dated pre 01 Jan 1970 00:00:00. */
194 if (high < 0x336e996a) 187 if (nsec < nsec_unix_epoch_diff_risc_os_epoch)
195 goto too_early; 188 goto too_early;
196 189
197 /* Files dated post 18 Jan 2038 03:14:05. */ 190 /* convert from RISC OS to Unix epoch */
198 if (high >= 0x656e9969) 191 nsec -= nsec_unix_epoch_diff_risc_os_epoch;
199 goto too_late;
200 192
201 /* discard 2208988800 (0x336e996a00) seconds of time */ 193 *tv = ns_to_timespec(nsec);
202 high -= 0x336e996a;
203
204 /* convert 40-bit centi-seconds to 32-bit seconds */
205 tv->tv_sec = (((high % 100) << 8) + low) / 100 + (high / 100 << 8);
206 tv->tv_nsec = 0;
207 return; 194 return;
208 195
209 cur_time: 196 cur_time:
210 *tv = CURRENT_TIME_SEC; 197 *tv = CURRENT_TIME;
211 return; 198 return;
212 199
213 too_early: 200 too_early:
214 tv->tv_sec = tv->tv_nsec = 0; 201 tv->tv_sec = tv->tv_nsec = 0;
215 return; 202 return;
216
217 too_late:
218 tv->tv_sec = 0x7ffffffd;
219 tv->tv_nsec = 0;
220 return;
221} 203}
222 204
223/* 205/*
@@ -279,7 +261,8 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
279 ADFS_I(inode)->loadaddr = obj->loadaddr; 261 ADFS_I(inode)->loadaddr = obj->loadaddr;
280 ADFS_I(inode)->execaddr = obj->execaddr; 262 ADFS_I(inode)->execaddr = obj->execaddr;
281 ADFS_I(inode)->attr = obj->attr; 263 ADFS_I(inode)->attr = obj->attr;
282 ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); 264 ADFS_I(inode)->filetype = obj->filetype;
265 ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
283 266
284 inode->i_mode = adfs_atts2mode(sb, inode); 267 inode->i_mode = adfs_atts2mode(sb, inode);
285 adfs_adfs2unix_time(&inode->i_mtime, inode); 268 adfs_adfs2unix_time(&inode->i_mtime, inode);
diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index d1a5932bb0f1..6935f05202ac 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -51,7 +51,7 @@ static DEFINE_RWLOCK(adfs_map_lock);
51 51
52/* 52/*
53 * This is fun. We need to load up to 19 bits from the map at an 53 * This is fun. We need to load up to 19 bits from the map at an
54 * arbitary bit alignment. (We're limited to 19 bits by F+ version 2). 54 * arbitrary bit alignment. (We're limited to 19 bits by F+ version 2).
55 */ 55 */
56#define GET_FRAG_ID(_map,_start,_idmask) \ 56#define GET_FRAG_ID(_map,_start,_idmask) \
57 ({ \ 57 ({ \
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 06d7388b477b..c8bf36a1996a 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -138,17 +138,20 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
138 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); 138 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask);
139 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) 139 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK)
140 seq_printf(seq, ",othmask=%o", asb->s_other_mask); 140 seq_printf(seq, ",othmask=%o", asb->s_other_mask);
141 if (asb->s_ftsuffix != 0)
142 seq_printf(seq, ",ftsuffix=%u", asb->s_ftsuffix);
141 143
142 return 0; 144 return 0;
143} 145}
144 146
145enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; 147enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix, Opt_err};
146 148
147static const match_table_t tokens = { 149static const match_table_t tokens = {
148 {Opt_uid, "uid=%u"}, 150 {Opt_uid, "uid=%u"},
149 {Opt_gid, "gid=%u"}, 151 {Opt_gid, "gid=%u"},
150 {Opt_ownmask, "ownmask=%o"}, 152 {Opt_ownmask, "ownmask=%o"},
151 {Opt_othmask, "othmask=%o"}, 153 {Opt_othmask, "othmask=%o"},
154 {Opt_ftsuffix, "ftsuffix=%u"},
152 {Opt_err, NULL} 155 {Opt_err, NULL}
153}; 156};
154 157
@@ -189,6 +192,11 @@ static int parse_options(struct super_block *sb, char *options)
189 return -EINVAL; 192 return -EINVAL;
190 asb->s_other_mask = option; 193 asb->s_other_mask = option;
191 break; 194 break;
195 case Opt_ftsuffix:
196 if (match_int(args, &option))
197 return -EINVAL;
198 asb->s_ftsuffix = option;
199 break;
192 default: 200 default:
193 printk("ADFS-fs: unrecognised mount option \"%s\" " 201 printk("ADFS-fs: unrecognised mount option \"%s\" "
194 "or missing value\n", p); 202 "or missing value\n", p);
@@ -366,6 +374,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
366 asb->s_gid = 0; 374 asb->s_gid = 0;
367 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; 375 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
368 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; 376 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
377 asb->s_ftsuffix = 0;
369 378
370 if (parse_options(sb, data)) 379 if (parse_options(sb, data))
371 goto error; 380 goto error;
@@ -445,11 +454,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
445 454
446 root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root); 455 root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root);
447 root_obj.name_len = 0; 456 root_obj.name_len = 0;
448 root_obj.loadaddr = 0; 457 /* Set root object date as 01 Jan 1987 00:00:00 */
449 root_obj.execaddr = 0; 458 root_obj.loadaddr = 0xfff0003f;
459 root_obj.execaddr = 0xec22c000;
450 root_obj.size = ADFS_NEWDIR_SIZE; 460 root_obj.size = ADFS_NEWDIR_SIZE;
451 root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ | 461 root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ |
452 ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ; 462 ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ;
463 root_obj.filetype = -1;
453 464
454 /* 465 /*
455 * If this is a F+ disk with variable length directories, 466 * If this is a F+ disk with variable length directories,
@@ -463,6 +474,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
463 asb->s_dir = &adfs_f_dir_ops; 474 asb->s_dir = &adfs_f_dir_ops;
464 asb->s_namelen = ADFS_F_NAME_LEN; 475 asb->s_namelen = ADFS_F_NAME_LEN;
465 } 476 }
477 /*
478 * ,xyz hex filetype suffix may be added by driver
479 * to files that have valid RISC OS filetype
480 */
481 if (asb->s_ftsuffix)
482 asb->s_namelen += 4;
466 483
467 sb->s_d_op = &adfs_dentry_operations; 484 sb->s_d_op = &adfs_dentry_operations;
468 root = adfs_iget(sb, &root_obj); 485 root = adfs_iget(sb, &root_obj);
diff --git a/fs/affs/Makefile b/fs/affs/Makefile
index b2c4f54446f3..3988b4a78339 100644
--- a/fs/affs/Makefile
+++ b/fs/affs/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the Linux affs filesystem routines. 2# Makefile for the Linux affs filesystem routines.
3# 3#
4 4
5#EXTRA_CFLAGS=-DDEBUG=1 5#ccflags-y := -DDEBUG=1
6 6
7obj-$(CONFIG_AFFS_FS) += affs.o 7obj-$(CONFIG_AFFS_FS) += affs.o
8 8
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0a90dcd46de2..acf321b70fcd 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -429,7 +429,6 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
429const struct address_space_operations affs_aops = { 429const struct address_space_operations affs_aops = {
430 .readpage = affs_readpage, 430 .readpage = affs_readpage,
431 .writepage = affs_writepage, 431 .writepage = affs_writepage,
432 .sync_page = block_sync_page,
433 .write_begin = affs_write_begin, 432 .write_begin = affs_write_begin,
434 .write_end = generic_write_end, 433 .write_end = generic_write_end,
435 .bmap = _affs_bmap 434 .bmap = _affs_bmap
@@ -786,7 +785,6 @@ out:
786const struct address_space_operations affs_aops_ofs = { 785const struct address_space_operations affs_aops_ofs = {
787 .readpage = affs_readpage_ofs, 786 .readpage = affs_readpage_ofs,
788 //.writepage = affs_writepage_ofs, 787 //.writepage = affs_writepage_ofs,
789 //.sync_page = affs_sync_page_ofs,
790 .write_begin = affs_write_begin_ofs, 788 .write_begin = affs_write_begin_ofs,
791 .write_end = affs_write_end_ofs 789 .write_end = affs_write_end_ofs
792}; 790};
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index 0fb315dd4d2a..577763c3d88b 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -98,7 +98,7 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
98} 98}
99 99
100/* 100/*
101 * provide new auxilliary cache data 101 * provide new auxiliary cache data
102 */ 102 */
103static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, 103static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
104 void *buffer, uint16_t bufmax) 104 void *buffer, uint16_t bufmax)
@@ -117,7 +117,7 @@ static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
117} 117}
118 118
119/* 119/*
120 * check that the auxilliary data indicates that the entry is still valid 120 * check that the auxiliary data indicates that the entry is still valid
121 */ 121 */
122static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, 122static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
123 const void *buffer, 123 const void *buffer,
@@ -150,7 +150,7 @@ static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
150} 150}
151 151
152/* 152/*
153 * provide new auxilliary cache data 153 * provide new auxiliary cache data
154 */ 154 */
155static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, 155static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
156 void *buffer, uint16_t bufmax) 156 void *buffer, uint16_t bufmax)
@@ -172,7 +172,7 @@ static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
172} 172}
173 173
174/* 174/*
175 * check that the auxilliary data indicates that the entry is still valid 175 * check that the auxiliary data indicates that the entry is still valid
176 */ 176 */
177static 177static
178enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, 178enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
@@ -283,7 +283,7 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
283} 283}
284 284
285/* 285/*
286 * provide new auxilliary cache data 286 * provide new auxiliary cache data
287 */ 287 */
288static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, 288static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
289 void *buffer, uint16_t bufmax) 289 void *buffer, uint16_t bufmax)
@@ -309,7 +309,7 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
309} 309}
310 310
311/* 311/*
312 * check that the auxilliary data indicates that the entry is still valid 312 * check that the auxiliary data indicates that the entry is still valid
313 */ 313 */
314static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, 314static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
315 const void *buffer, 315 const void *buffer,
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 0d5eeadf6121..3c090b7555ea 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -293,7 +293,7 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
293 if (!cell) { 293 if (!cell) {
294 /* this should not happen unless user tries to mount 294 /* this should not happen unless user tries to mount
295 * when root cell is not set. Return an impossibly 295 * when root cell is not set. Return an impossibly
296 * bizzare errno to alert the user. Things like 296 * bizarre errno to alert the user. Things like
297 * ENOENT might be "more appropriate" but they happen 297 * ENOENT might be "more appropriate" but they happen
298 * for other reasons. 298 * for other reasons.
299 */ 299 */
diff --git a/fs/aio.c b/fs/aio.c
index 7f54f43b8f7c..e29ec485af25 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -34,8 +34,6 @@
34#include <linux/security.h> 34#include <linux/security.h>
35#include <linux/eventfd.h> 35#include <linux/eventfd.h>
36#include <linux/blkdev.h> 36#include <linux/blkdev.h>
37#include <linux/mempool.h>
38#include <linux/hash.h>
39#include <linux/compat.h> 37#include <linux/compat.h>
40 38
41#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
@@ -65,14 +63,6 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
65static DEFINE_SPINLOCK(fput_lock); 63static DEFINE_SPINLOCK(fput_lock);
66static LIST_HEAD(fput_head); 64static LIST_HEAD(fput_head);
67 65
68#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
69#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
70struct aio_batch_entry {
71 struct hlist_node list;
72 struct address_space *mapping;
73};
74mempool_t *abe_pool;
75
76static void aio_kick_handler(struct work_struct *); 66static void aio_kick_handler(struct work_struct *);
77static void aio_queue_work(struct kioctx *); 67static void aio_queue_work(struct kioctx *);
78 68
@@ -86,8 +76,7 @@ static int __init aio_setup(void)
86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 76 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
87 77
88 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */ 78 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); 79 BUG_ON(!aio_wq);
90 BUG_ON(!aio_wq || !abe_pool);
91 80
92 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 81 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
93 82
@@ -520,7 +509,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
520 ctx->reqs_active--; 509 ctx->reqs_active--;
521 510
522 if (unlikely(!ctx->reqs_active && ctx->dead)) 511 if (unlikely(!ctx->reqs_active && ctx->dead))
523 wake_up(&ctx->wait); 512 wake_up_all(&ctx->wait);
524} 513}
525 514
526static void aio_fput_routine(struct work_struct *data) 515static void aio_fput_routine(struct work_struct *data)
@@ -1229,7 +1218,7 @@ static void io_destroy(struct kioctx *ioctx)
1229 * by other CPUs at this point. Right now, we rely on the 1218 * by other CPUs at this point. Right now, we rely on the
1230 * locking done by the above calls to ensure this consistency. 1219 * locking done by the above calls to ensure this consistency.
1231 */ 1220 */
1232 wake_up(&ioctx->wait); 1221 wake_up_all(&ioctx->wait);
1233 put_ioctx(ioctx); /* once for the lookup */ 1222 put_ioctx(ioctx); /* once for the lookup */
1234} 1223}
1235 1224
@@ -1525,57 +1514,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1525 return 0; 1514 return 0;
1526} 1515}
1527 1516
1528static void aio_batch_add(struct address_space *mapping,
1529 struct hlist_head *batch_hash)
1530{
1531 struct aio_batch_entry *abe;
1532 struct hlist_node *pos;
1533 unsigned bucket;
1534
1535 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1536 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1537 if (abe->mapping == mapping)
1538 return;
1539 }
1540
1541 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1542
1543 /*
1544 * we should be using igrab here, but
1545 * we don't want to hammer on the global
1546 * inode spinlock just to take an extra
1547 * reference on a file that we must already
1548 * have a reference to.
1549 *
1550 * When we're called, we always have a reference
1551 * on the file, so we must always have a reference
1552 * on the inode, so ihold() is safe here.
1553 */
1554 ihold(mapping->host);
1555 abe->mapping = mapping;
1556 hlist_add_head(&abe->list, &batch_hash[bucket]);
1557 return;
1558}
1559
1560static void aio_batch_free(struct hlist_head *batch_hash)
1561{
1562 struct aio_batch_entry *abe;
1563 struct hlist_node *pos, *n;
1564 int i;
1565
1566 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1567 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1568 blk_run_address_space(abe->mapping);
1569 iput(abe->mapping->host);
1570 hlist_del(&abe->list);
1571 mempool_free(abe, abe_pool);
1572 }
1573 }
1574}
1575
1576static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1517static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1577 struct iocb *iocb, struct hlist_head *batch_hash, 1518 struct iocb *iocb, bool compat)
1578 bool compat)
1579{ 1519{
1580 struct kiocb *req; 1520 struct kiocb *req;
1581 struct file *file; 1521 struct file *file;
@@ -1666,11 +1606,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1666 ; 1606 ;
1667 } 1607 }
1668 spin_unlock_irq(&ctx->ctx_lock); 1608 spin_unlock_irq(&ctx->ctx_lock);
1669 if (req->ki_opcode == IOCB_CMD_PREAD ||
1670 req->ki_opcode == IOCB_CMD_PREADV ||
1671 req->ki_opcode == IOCB_CMD_PWRITE ||
1672 req->ki_opcode == IOCB_CMD_PWRITEV)
1673 aio_batch_add(file->f_mapping, batch_hash);
1674 1609
1675 aio_put_req(req); /* drop extra ref to req */ 1610 aio_put_req(req); /* drop extra ref to req */
1676 return 0; 1611 return 0;
@@ -1687,7 +1622,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1687 struct kioctx *ctx; 1622 struct kioctx *ctx;
1688 long ret = 0; 1623 long ret = 0;
1689 int i; 1624 int i;
1690 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; 1625 struct blk_plug plug;
1691 1626
1692 if (unlikely(nr < 0)) 1627 if (unlikely(nr < 0))
1693 return -EINVAL; 1628 return -EINVAL;
@@ -1704,6 +1639,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1704 return -EINVAL; 1639 return -EINVAL;
1705 } 1640 }
1706 1641
1642 blk_start_plug(&plug);
1643
1707 /* 1644 /*
1708 * AKPM: should this return a partial result if some of the IOs were 1645 * AKPM: should this return a partial result if some of the IOs were
1709 * successfully submitted? 1646 * successfully submitted?
@@ -1722,11 +1659,11 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1722 break; 1659 break;
1723 } 1660 }
1724 1661
1725 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat); 1662 ret = io_submit_one(ctx, user_iocb, &tmp, compat);
1726 if (ret) 1663 if (ret)
1727 break; 1664 break;
1728 } 1665 }
1729 aio_batch_free(batch_hash); 1666 blk_finish_plug(&plug);
1730 1667
1731 put_ioctx(ctx); 1668 put_ioctx(ctx);
1732 return i ? i : ret; 1669 return i ? i : ret;
diff --git a/fs/attr.c b/fs/attr.c
index 7ca41811afa1..91dbe2a107f2 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -59,7 +59,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
59 59
60 /* Make sure a caller can chmod. */ 60 /* Make sure a caller can chmod. */
61 if (ia_valid & ATTR_MODE) { 61 if (ia_valid & ATTR_MODE) {
62 if (!is_owner_or_cap(inode)) 62 if (!inode_owner_or_capable(inode))
63 return -EPERM; 63 return -EPERM;
64 /* Also check the setgid bit! */ 64 /* Also check the setgid bit! */
65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
@@ -69,7 +69,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
69 69
70 /* Check for setting the inode time. */ 70 /* Check for setting the inode time. */
71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { 71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
72 if (!is_owner_or_cap(inode)) 72 if (!inode_owner_or_capable(inode))
73 return -EPERM; 73 return -EPERM;
74 } 74 }
75 75
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
128 * setattr_copy must be called with i_mutex held. 128 * setattr_copy must be called with i_mutex held.
129 * 129 *
130 * setattr_copy updates the inode's metadata with that specified 130 * setattr_copy updates the inode's metadata with that specified
131 * in attr. Noticably missing is inode size update, which is more complex 131 * in attr. Noticeably missing is inode size update, which is more complex
132 * as it requires pagecache updates. 132 * as it requires pagecache updates.
133 * 133 *
134 * The inode is not marked as dirty after this operation. The rationale is 134 * The inode is not marked as dirty after this operation. The rationale is
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 54f923792728..475f9c597cb7 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -61,8 +61,6 @@ do { \
61 current->pid, __func__, ##args); \ 61 current->pid, __func__, ##args); \
62} while (0) 62} while (0)
63 63
64extern spinlock_t autofs4_lock;
65
66/* Unified info structure. This is pointed to by both the dentry and 64/* Unified info structure. This is pointed to by both the dentry and
67 inode structures. Each file in the filesystem has an instance of this 65 inode structures. Each file in the filesystem has an instance of this
68 structure. It holds a reference to the dentry, so dentries are never 66 structure. It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 1442da4860e5..509fe1eb66ae 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -372,6 +372,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
372 return -EBUSY; 372 return -EBUSY;
373 } else { 373 } else {
374 struct file *pipe = fget(pipefd); 374 struct file *pipe = fget(pipefd);
375 if (!pipe) {
376 err = -EBADF;
377 goto out;
378 }
375 if (!pipe->f_op || !pipe->f_op->write) { 379 if (!pipe->f_op || !pipe->f_op->write) {
376 err = -EPIPE; 380 err = -EPIPE;
377 fput(pipe); 381 fput(pipe);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index f43100b9662b..450f529a4eae 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -87,18 +87,70 @@ done:
87} 87}
88 88
89/* 89/*
90 * Calculate and dget next entry in the subdirs list under root.
91 */
92static struct dentry *get_next_positive_subdir(struct dentry *prev,
93 struct dentry *root)
94{
95 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
96 struct list_head *next;
97 struct dentry *p, *q;
98
99 spin_lock(&sbi->lookup_lock);
100
101 if (prev == NULL) {
102 spin_lock(&root->d_lock);
103 prev = dget_dlock(root);
104 next = prev->d_subdirs.next;
105 p = prev;
106 goto start;
107 }
108
109 p = prev;
110 spin_lock(&p->d_lock);
111again:
112 next = p->d_u.d_child.next;
113start:
114 if (next == &root->d_subdirs) {
115 spin_unlock(&p->d_lock);
116 spin_unlock(&sbi->lookup_lock);
117 dput(prev);
118 return NULL;
119 }
120
121 q = list_entry(next, struct dentry, d_u.d_child);
122
123 spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
124 /* Negative dentry - try next */
125 if (!simple_positive(q)) {
126 spin_unlock(&p->d_lock);
127 p = q;
128 goto again;
129 }
130 dget_dlock(q);
131 spin_unlock(&q->d_lock);
132 spin_unlock(&p->d_lock);
133 spin_unlock(&sbi->lookup_lock);
134
135 dput(prev);
136
137 return q;
138}
139
140/*
90 * Calculate and dget next entry in top down tree traversal. 141 * Calculate and dget next entry in top down tree traversal.
91 */ 142 */
92static struct dentry *get_next_positive_dentry(struct dentry *prev, 143static struct dentry *get_next_positive_dentry(struct dentry *prev,
93 struct dentry *root) 144 struct dentry *root)
94{ 145{
146 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
95 struct list_head *next; 147 struct list_head *next;
96 struct dentry *p, *ret; 148 struct dentry *p, *ret;
97 149
98 if (prev == NULL) 150 if (prev == NULL)
99 return dget(root); 151 return dget(root);
100 152
101 spin_lock(&autofs4_lock); 153 spin_lock(&sbi->lookup_lock);
102relock: 154relock:
103 p = prev; 155 p = prev;
104 spin_lock(&p->d_lock); 156 spin_lock(&p->d_lock);
@@ -110,7 +162,7 @@ again:
110 162
111 if (p == root) { 163 if (p == root) {
112 spin_unlock(&p->d_lock); 164 spin_unlock(&p->d_lock);
113 spin_unlock(&autofs4_lock); 165 spin_unlock(&sbi->lookup_lock);
114 dput(prev); 166 dput(prev);
115 return NULL; 167 return NULL;
116 } 168 }
@@ -140,7 +192,7 @@ again:
140 dget_dlock(ret); 192 dget_dlock(ret);
141 spin_unlock(&ret->d_lock); 193 spin_unlock(&ret->d_lock);
142 spin_unlock(&p->d_lock); 194 spin_unlock(&p->d_lock);
143 spin_unlock(&autofs4_lock); 195 spin_unlock(&sbi->lookup_lock);
144 196
145 dput(prev); 197 dput(prev);
146 198
@@ -290,11 +342,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
290 spin_lock(&sbi->fs_lock); 342 spin_lock(&sbi->fs_lock);
291 ino = autofs4_dentry_ino(root); 343 ino = autofs4_dentry_ino(root);
292 /* No point expiring a pending mount */ 344 /* No point expiring a pending mount */
293 if (ino->flags & AUTOFS_INF_PENDING) { 345 if (ino->flags & AUTOFS_INF_PENDING)
294 spin_unlock(&sbi->fs_lock); 346 goto out;
295 return NULL;
296 }
297 managed_dentry_set_transit(root);
298 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 347 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
299 struct autofs_info *ino = autofs4_dentry_ino(root); 348 struct autofs_info *ino = autofs4_dentry_ino(root);
300 ino->flags |= AUTOFS_INF_EXPIRING; 349 ino->flags |= AUTOFS_INF_EXPIRING;
@@ -302,7 +351,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
302 spin_unlock(&sbi->fs_lock); 351 spin_unlock(&sbi->fs_lock);
303 return root; 352 return root;
304 } 353 }
305 managed_dentry_clear_transit(root); 354out:
306 spin_unlock(&sbi->fs_lock); 355 spin_unlock(&sbi->fs_lock);
307 dput(root); 356 dput(root);
308 357
@@ -336,13 +385,12 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
336 timeout = sbi->exp_timeout; 385 timeout = sbi->exp_timeout;
337 386
338 dentry = NULL; 387 dentry = NULL;
339 while ((dentry = get_next_positive_dentry(dentry, root))) { 388 while ((dentry = get_next_positive_subdir(dentry, root))) {
340 spin_lock(&sbi->fs_lock); 389 spin_lock(&sbi->fs_lock);
341 ino = autofs4_dentry_ino(dentry); 390 ino = autofs4_dentry_ino(dentry);
342 /* No point expiring a pending mount */ 391 /* No point expiring a pending mount */
343 if (ino->flags & AUTOFS_INF_PENDING) 392 if (ino->flags & AUTOFS_INF_PENDING)
344 goto cont; 393 goto next;
345 managed_dentry_set_transit(dentry);
346 394
347 /* 395 /*
348 * Case 1: (i) indirect mount or top level pseudo direct mount 396 * Case 1: (i) indirect mount or top level pseudo direct mount
@@ -402,8 +450,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
402 } 450 }
403 } 451 }
404next: 452next:
405 managed_dentry_clear_transit(dentry);
406cont:
407 spin_unlock(&sbi->fs_lock); 453 spin_unlock(&sbi->fs_lock);
408 } 454 }
409 return NULL; 455 return NULL;
@@ -415,13 +461,13 @@ found:
415 ino->flags |= AUTOFS_INF_EXPIRING; 461 ino->flags |= AUTOFS_INF_EXPIRING;
416 init_completion(&ino->expire_complete); 462 init_completion(&ino->expire_complete);
417 spin_unlock(&sbi->fs_lock); 463 spin_unlock(&sbi->fs_lock);
418 spin_lock(&autofs4_lock); 464 spin_lock(&sbi->lookup_lock);
419 spin_lock(&expired->d_parent->d_lock); 465 spin_lock(&expired->d_parent->d_lock);
420 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); 466 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
421 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 467 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
422 spin_unlock(&expired->d_lock); 468 spin_unlock(&expired->d_lock);
423 spin_unlock(&expired->d_parent->d_lock); 469 spin_unlock(&expired->d_parent->d_lock);
424 spin_unlock(&autofs4_lock); 470 spin_unlock(&sbi->lookup_lock);
425 return expired; 471 return expired;
426} 472}
427 473
@@ -484,8 +530,6 @@ int autofs4_expire_run(struct super_block *sb,
484 spin_lock(&sbi->fs_lock); 530 spin_lock(&sbi->fs_lock);
485 ino = autofs4_dentry_ino(dentry); 531 ino = autofs4_dentry_ino(dentry);
486 ino->flags &= ~AUTOFS_INF_EXPIRING; 532 ino->flags &= ~AUTOFS_INF_EXPIRING;
487 if (!d_unhashed(dentry))
488 managed_dentry_clear_transit(dentry);
489 complete_all(&ino->expire_complete); 533 complete_all(&ino->expire_complete);
490 spin_unlock(&sbi->fs_lock); 534 spin_unlock(&sbi->fs_lock);
491 535
@@ -513,9 +557,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
513 spin_lock(&sbi->fs_lock); 557 spin_lock(&sbi->fs_lock);
514 ino->flags &= ~AUTOFS_INF_EXPIRING; 558 ino->flags &= ~AUTOFS_INF_EXPIRING;
515 spin_lock(&dentry->d_lock); 559 spin_lock(&dentry->d_lock);
516 if (ret) 560 if (!ret) {
517 __managed_dentry_clear_transit(dentry);
518 else {
519 if ((IS_ROOT(dentry) || 561 if ((IS_ROOT(dentry) ||
520 (autofs_type_indirect(sbi->type) && 562 (autofs_type_indirect(sbi->type) &&
521 IS_ROOT(dentry->d_parent))) && 563 IS_ROOT(dentry->d_parent))) &&
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e6f84d26f4cf..f55ae23b137e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,8 +23,6 @@
23 23
24#include "autofs_i.h" 24#include "autofs_i.h"
25 25
26DEFINE_SPINLOCK(autofs4_lock);
27
28static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
29static int autofs4_dir_unlink(struct inode *,struct dentry *); 27static int autofs4_dir_unlink(struct inode *,struct dentry *);
30static int autofs4_dir_rmdir(struct inode *,struct dentry *); 28static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -125,15 +123,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
125 * autofs file system so just let the libfs routines handle 123 * autofs file system so just let the libfs routines handle
126 * it. 124 * it.
127 */ 125 */
128 spin_lock(&autofs4_lock); 126 spin_lock(&sbi->lookup_lock);
129 spin_lock(&dentry->d_lock); 127 spin_lock(&dentry->d_lock);
130 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 128 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
131 spin_unlock(&dentry->d_lock); 129 spin_unlock(&dentry->d_lock);
132 spin_unlock(&autofs4_lock); 130 spin_unlock(&sbi->lookup_lock);
133 return -ENOENT; 131 return -ENOENT;
134 } 132 }
135 spin_unlock(&dentry->d_lock); 133 spin_unlock(&dentry->d_lock);
136 spin_unlock(&autofs4_lock); 134 spin_unlock(&sbi->lookup_lock);
137 135
138out: 136out:
139 return dcache_dir_open(inode, file); 137 return dcache_dir_open(inode, file);
@@ -171,7 +169,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
171 const unsigned char *str = name->name; 169 const unsigned char *str = name->name;
172 struct list_head *p, *head; 170 struct list_head *p, *head;
173 171
174 spin_lock(&autofs4_lock);
175 spin_lock(&sbi->lookup_lock); 172 spin_lock(&sbi->lookup_lock);
176 head = &sbi->active_list; 173 head = &sbi->active_list;
177 list_for_each(p, head) { 174 list_for_each(p, head) {
@@ -204,14 +201,12 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
204 dget_dlock(active); 201 dget_dlock(active);
205 spin_unlock(&active->d_lock); 202 spin_unlock(&active->d_lock);
206 spin_unlock(&sbi->lookup_lock); 203 spin_unlock(&sbi->lookup_lock);
207 spin_unlock(&autofs4_lock);
208 return active; 204 return active;
209 } 205 }
210next: 206next:
211 spin_unlock(&active->d_lock); 207 spin_unlock(&active->d_lock);
212 } 208 }
213 spin_unlock(&sbi->lookup_lock); 209 spin_unlock(&sbi->lookup_lock);
214 spin_unlock(&autofs4_lock);
215 210
216 return NULL; 211 return NULL;
217} 212}
@@ -226,7 +221,6 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
226 const unsigned char *str = name->name; 221 const unsigned char *str = name->name;
227 struct list_head *p, *head; 222 struct list_head *p, *head;
228 223
229 spin_lock(&autofs4_lock);
230 spin_lock(&sbi->lookup_lock); 224 spin_lock(&sbi->lookup_lock);
231 head = &sbi->expiring_list; 225 head = &sbi->expiring_list;
232 list_for_each(p, head) { 226 list_for_each(p, head) {
@@ -259,14 +253,12 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
259 dget_dlock(expiring); 253 dget_dlock(expiring);
260 spin_unlock(&expiring->d_lock); 254 spin_unlock(&expiring->d_lock);
261 spin_unlock(&sbi->lookup_lock); 255 spin_unlock(&sbi->lookup_lock);
262 spin_unlock(&autofs4_lock);
263 return expiring; 256 return expiring;
264 } 257 }
265next: 258next:
266 spin_unlock(&expiring->d_lock); 259 spin_unlock(&expiring->d_lock);
267 } 260 }
268 spin_unlock(&sbi->lookup_lock); 261 spin_unlock(&sbi->lookup_lock);
269 spin_unlock(&autofs4_lock);
270 262
271 return NULL; 263 return NULL;
272} 264}
@@ -275,17 +267,16 @@ static int autofs4_mount_wait(struct dentry *dentry)
275{ 267{
276 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 268 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
277 struct autofs_info *ino = autofs4_dentry_ino(dentry); 269 struct autofs_info *ino = autofs4_dentry_ino(dentry);
278 int status; 270 int status = 0;
279 271
280 if (ino->flags & AUTOFS_INF_PENDING) { 272 if (ino->flags & AUTOFS_INF_PENDING) {
281 DPRINTK("waiting for mount name=%.*s", 273 DPRINTK("waiting for mount name=%.*s",
282 dentry->d_name.len, dentry->d_name.name); 274 dentry->d_name.len, dentry->d_name.name);
283 status = autofs4_wait(sbi, dentry, NFY_MOUNT); 275 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
284 DPRINTK("mount wait done status=%d", status); 276 DPRINTK("mount wait done status=%d", status);
285 ino->last_used = jiffies;
286 return status;
287 } 277 }
288 return 0; 278 ino->last_used = jiffies;
279 return status;
289} 280}
290 281
291static int do_expire_wait(struct dentry *dentry) 282static int do_expire_wait(struct dentry *dentry)
@@ -319,9 +310,12 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
319 */ 310 */
320 if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { 311 if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
321 struct dentry *parent = dentry->d_parent; 312 struct dentry *parent = dentry->d_parent;
313 struct autofs_info *ino;
322 struct dentry *new = d_lookup(parent, &dentry->d_name); 314 struct dentry *new = d_lookup(parent, &dentry->d_name);
323 if (!new) 315 if (!new)
324 return NULL; 316 return NULL;
317 ino = autofs4_dentry_ino(new);
318 ino->last_used = jiffies;
325 dput(path->dentry); 319 dput(path->dentry);
326 path->dentry = new; 320 path->dentry = new;
327 } 321 }
@@ -338,18 +332,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
338 DPRINTK("dentry=%p %.*s", 332 DPRINTK("dentry=%p %.*s",
339 dentry, dentry->d_name.len, dentry->d_name.name); 333 dentry, dentry->d_name.len, dentry->d_name.name);
340 334
341 /*
342 * Someone may have manually umounted this or it was a submount
343 * that has gone away.
344 */
345 spin_lock(&dentry->d_lock);
346 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
347 if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
348 (dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
349 __managed_dentry_set_transit(path->dentry);
350 }
351 spin_unlock(&dentry->d_lock);
352
353 /* The daemon never triggers a mount. */ 335 /* The daemon never triggers a mount. */
354 if (autofs4_oz_mode(sbi)) 336 if (autofs4_oz_mode(sbi))
355 return NULL; 337 return NULL;
@@ -418,18 +400,17 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
418done: 400done:
419 if (!(ino->flags & AUTOFS_INF_EXPIRING)) { 401 if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
420 /* 402 /*
421 * Any needed mounting has been completed and the path updated 403 * Any needed mounting has been completed and the path
422 * so turn this into a normal dentry so we don't continually 404 * updated so clear DCACHE_NEED_AUTOMOUNT so we don't
423 * call ->d_automount() and ->d_manage(). 405 * call ->d_automount() on rootless multi-mounts since
424 */ 406 * it can lead to an incorrect ELOOP error return.
425 spin_lock(&dentry->d_lock); 407 *
426 __managed_dentry_clear_transit(dentry);
427 /*
428 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and 408 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
429 * symlinks as in all other cases the dentry will be covered by 409 * symlinks as in all other cases the dentry will be covered by
430 * an actual mount so ->d_automount() won't be called during 410 * an actual mount so ->d_automount() won't be called during
431 * the follow. 411 * the follow.
432 */ 412 */
413 spin_lock(&dentry->d_lock);
433 if ((!d_mountpoint(dentry) && 414 if ((!d_mountpoint(dentry) &&
434 !list_empty(&dentry->d_subdirs)) || 415 !list_empty(&dentry->d_subdirs)) ||
435 (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) 416 (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)))
@@ -455,6 +436,8 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
455 436
456 /* The daemon never waits. */ 437 /* The daemon never waits. */
457 if (autofs4_oz_mode(sbi)) { 438 if (autofs4_oz_mode(sbi)) {
439 if (rcu_walk)
440 return 0;
458 if (!d_mountpoint(dentry)) 441 if (!d_mountpoint(dentry))
459 return -EISDIR; 442 return -EISDIR;
460 return 0; 443 return 0;
@@ -612,12 +595,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
612 595
613 dir->i_mtime = CURRENT_TIME; 596 dir->i_mtime = CURRENT_TIME;
614 597
615 spin_lock(&autofs4_lock); 598 spin_lock(&sbi->lookup_lock);
616 autofs4_add_expiring(dentry); 599 __autofs4_add_expiring(dentry);
617 spin_lock(&dentry->d_lock); 600 spin_lock(&dentry->d_lock);
618 __d_drop(dentry); 601 __d_drop(dentry);
619 spin_unlock(&dentry->d_lock); 602 spin_unlock(&dentry->d_lock);
620 spin_unlock(&autofs4_lock); 603 spin_unlock(&sbi->lookup_lock);
621 604
622 return 0; 605 return 0;
623} 606}
@@ -629,7 +612,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
629 * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves 612 * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves
630 * of the directory tree. There is no need to clear the automount flag 613 * of the directory tree. There is no need to clear the automount flag
631 * following a mount or restore it after an expire because these mounts 614 * following a mount or restore it after an expire because these mounts
632 * are always covered. However, it is neccessary to ensure that these 615 * are always covered. However, it is necessary to ensure that these
633 * flags are clear on non-empty directories to avoid unnecessary calls 616 * flags are clear on non-empty directories to avoid unnecessary calls
634 * during path walks. 617 * during path walks.
635 */ 618 */
@@ -686,20 +669,17 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
686 if (!autofs4_oz_mode(sbi)) 669 if (!autofs4_oz_mode(sbi))
687 return -EACCES; 670 return -EACCES;
688 671
689 spin_lock(&autofs4_lock);
690 spin_lock(&sbi->lookup_lock); 672 spin_lock(&sbi->lookup_lock);
691 spin_lock(&dentry->d_lock); 673 spin_lock(&dentry->d_lock);
692 if (!list_empty(&dentry->d_subdirs)) { 674 if (!list_empty(&dentry->d_subdirs)) {
693 spin_unlock(&dentry->d_lock); 675 spin_unlock(&dentry->d_lock);
694 spin_unlock(&sbi->lookup_lock); 676 spin_unlock(&sbi->lookup_lock);
695 spin_unlock(&autofs4_lock);
696 return -ENOTEMPTY; 677 return -ENOTEMPTY;
697 } 678 }
698 __autofs4_add_expiring(dentry); 679 __autofs4_add_expiring(dentry);
699 spin_unlock(&sbi->lookup_lock);
700 __d_drop(dentry); 680 __d_drop(dentry);
701 spin_unlock(&dentry->d_lock); 681 spin_unlock(&dentry->d_lock);
702 spin_unlock(&autofs4_lock); 682 spin_unlock(&sbi->lookup_lock);
703 683
704 if (sbi->version < 5) 684 if (sbi->version < 5)
705 autofs_clear_leaf_automount_flags(dentry); 685 autofs_clear_leaf_automount_flags(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 56010056b2e6..25435987d6ae 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -197,12 +197,12 @@ rename_retry:
197 197
198 seq = read_seqbegin(&rename_lock); 198 seq = read_seqbegin(&rename_lock);
199 rcu_read_lock(); 199 rcu_read_lock();
200 spin_lock(&autofs4_lock); 200 spin_lock(&sbi->fs_lock);
201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
202 len += tmp->d_name.len + 1; 202 len += tmp->d_name.len + 1;
203 203
204 if (!len || --len > NAME_MAX) { 204 if (!len || --len > NAME_MAX) {
205 spin_unlock(&autofs4_lock); 205 spin_unlock(&sbi->fs_lock);
206 rcu_read_unlock(); 206 rcu_read_unlock();
207 if (read_seqretry(&rename_lock, seq)) 207 if (read_seqretry(&rename_lock, seq))
208 goto rename_retry; 208 goto rename_retry;
@@ -218,7 +218,7 @@ rename_retry:
218 p -= tmp->d_name.len; 218 p -= tmp->d_name.len;
219 strncpy(p, tmp->d_name.name, tmp->d_name.len); 219 strncpy(p, tmp->d_name.name, tmp->d_name.len);
220 } 220 }
221 spin_unlock(&autofs4_lock); 221 spin_unlock(&sbi->fs_lock);
222 rcu_read_unlock(); 222 rcu_read_unlock();
223 if (read_seqretry(&rename_lock, seq)) 223 if (read_seqretry(&rename_lock, seq))
224 goto rename_retry; 224 goto rename_retry;
diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog
index ce8c787916be..75a461cfaca6 100644
--- a/fs/befs/ChangeLog
+++ b/fs/befs/ChangeLog
@@ -24,7 +24,7 @@ Version 0.9 (2002-03-14)
24 24
25Version 0.64 (2002-02-07) 25Version 0.64 (2002-02-07)
26========== 26==========
27* Did the string comparision really right this time (btree.c) [WD] 27* Did the string comparison really right this time (btree.c) [WD]
28 28
29* Fixed up some places where I assumed that a long int could hold 29* Fixed up some places where I assumed that a long int could hold
30 a pointer value. (btree.c) [WD] 30 a pointer value. (btree.c) [WD]
@@ -114,7 +114,7 @@ Version 0.6 (2001-12-15)
114 More flexible. Will soon be controllable at mount time 114 More flexible. Will soon be controllable at mount time
115 (see TODO). [WD] 115 (see TODO). [WD]
116 116
117* Rewrote datastream positon lookups. 117* Rewrote datastream position lookups.
118 (datastream.c) [WD] 118 (datastream.c) [WD]
119 119
120* Moved the TODO list to its own file. 120* Moved the TODO list to its own file.
@@ -150,7 +150,7 @@ Version 0.50 (2001-11-13)
150* Anton also told me that the blocksize is not allowed to be larger than 150* Anton also told me that the blocksize is not allowed to be larger than
151 the page size in linux, which is 4k i386. Oops. Added a test for 151 the page size in linux, which is 4k i386. Oops. Added a test for
152 (blocksize > PAGE_SIZE), and refuse to mount in that case. What this 152 (blocksize > PAGE_SIZE), and refuse to mount in that case. What this
153 practicaly means is that 8k blocksize volumes won't work without a major 153 practically means is that 8k blocksize volumes won't work without a major
154 restructuring of the driver (or an alpha or other 64bit hardware). [WD] 154 restructuring of the driver (or an alpha or other 64bit hardware). [WD]
155 155
156* Cleaned up the befs_count_blocks() function. Much smarter now. 156* Cleaned up the befs_count_blocks() function. Much smarter now.
@@ -183,7 +183,7 @@ Version 0.45 (2001-10-29)
183 structures into the generic pointer fields of the public structures 183 structures into the generic pointer fields of the public structures
184 with kmalloc(). put_super and put_inode free them. This allows us not 184 with kmalloc(). put_super and put_inode free them. This allows us not
185 to have to touch the definitions of the public structures in 185 to have to touch the definitions of the public structures in
186 include/linux/fs.h. Also, befs_inode_info is huge (becuase of the 186 include/linux/fs.h. Also, befs_inode_info is huge (because of the
187 symlink string). (super.c, inode.c, befs_fs.h) [WD] 187 symlink string). (super.c, inode.c, befs_fs.h) [WD]
188 188
189* Fixed a thinko that was corrupting file reads after the first block_run 189* Fixed a thinko that was corrupting file reads after the first block_run
@@ -404,7 +404,7 @@ Version 0.4 (2001-10-28)
404 404
405* Fixed compile errors on 2.4.1 kernel (WD) 405* Fixed compile errors on 2.4.1 kernel (WD)
406 Resolve rejected patches 406 Resolve rejected patches
407 Accomodate changed NLS interface (util.h) 407 Accommodate changed NLS interface (util.h)
408 Needed to include <linux/slab.h> in most files 408 Needed to include <linux/slab.h> in most files
409 Makefile changes 409 Makefile changes
410 fs/Config.in changes 410 fs/Config.in changes
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h
index 7893eaa1e58c..eb557d9dc8be 100644
--- a/fs/befs/befs_fs_types.h
+++ b/fs/befs/befs_fs_types.h
@@ -234,7 +234,7 @@ typedef struct {
234} PACKED befs_btree_super; 234} PACKED befs_btree_super;
235 235
236/* 236/*
237 * Header stucture of each btree node 237 * Header structure of each btree node
238 */ 238 */
239typedef struct { 239typedef struct {
240 fs64 left; 240 fs64 left;
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index 4202db7496cb..a66c9b1136e0 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Licensed under the GNU GPL. See the file COPYING for details. 6 * Licensed under the GNU GPL. See the file COPYING for details.
7 * 7 *
8 * 2002-02-05: Sergey S. Kostyliov added binary search withing 8 * 2002-02-05: Sergey S. Kostyliov added binary search within
9 * btree nodes. 9 * btree nodes.
10 * 10 *
11 * Many thanks to: 11 * Many thanks to:
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index b1d0c794747b..54b8c28bebc8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -75,7 +75,6 @@ static const struct inode_operations befs_dir_inode_operations = {
75 75
76static const struct address_space_operations befs_aops = { 76static const struct address_space_operations befs_aops = {
77 .readpage = befs_readpage, 77 .readpage = befs_readpage,
78 .sync_page = block_sync_page,
79 .bmap = befs_bmap, 78 .bmap = befs_bmap,
80}; 79};
81 80
@@ -735,7 +734,7 @@ parse_options(char *options, befs_mount_options * opts)
735 734
736/* This function has the responsibiltiy of getting the 735/* This function has the responsibiltiy of getting the
737 * filesystem ready for unmounting. 736 * filesystem ready for unmounting.
738 * Basicly, we free everything that we allocated in 737 * Basically, we free everything that we allocated in
739 * befs_read_inode 738 * befs_read_inode
740 */ 739 */
741static void 740static void
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 685ecff3ab31..b14cebfd9047 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -97,7 +97,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
97 if (!inode) 97 if (!inode)
98 return -ENOSPC; 98 return -ENOSPC;
99 mutex_lock(&info->bfs_lock); 99 mutex_lock(&info->bfs_lock);
100 ino = find_first_zero_bit(info->si_imap, info->si_lasti); 100 ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1);
101 if (ino > info->si_lasti) { 101 if (ino > info->si_lasti) {
102 mutex_unlock(&info->bfs_lock); 102 mutex_unlock(&info->bfs_lock);
103 iput(inode); 103 iput(inode);
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index eb67edd0f8ea..f20e8a71062f 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -186,7 +186,6 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
186const struct address_space_operations bfs_aops = { 186const struct address_space_operations bfs_aops = {
187 .readpage = bfs_readpage, 187 .readpage = bfs_readpage,
188 .writepage = bfs_writepage, 188 .writepage = bfs_writepage,
189 .sync_page = block_sync_page,
190 .write_begin = bfs_write_begin, 189 .write_begin = bfs_write_begin,
191 .write_end = generic_write_end, 190 .write_end = generic_write_end,
192 .bmap = bfs_bmap, 191 .bmap = bfs_bmap,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d5b640ba6cb1..303983fabfd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -570,7 +570,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
570 unsigned long elf_entry; 570 unsigned long elf_entry;
571 unsigned long interp_load_addr = 0; 571 unsigned long interp_load_addr = 0;
572 unsigned long start_code, end_code, start_data, end_data; 572 unsigned long start_code, end_code, start_data, end_data;
573 unsigned long reloc_func_desc = 0; 573 unsigned long reloc_func_desc __maybe_unused = 0;
574 int executable_stack = EXSTACK_DEFAULT; 574 int executable_stack = EXSTACK_DEFAULT;
575 unsigned long def_flags = 0; 575 unsigned long def_flags = 0;
576 struct { 576 struct {
@@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
941 current->mm->start_stack = bprm->p; 941 current->mm->start_stack = bprm->p;
942 942
943#ifdef arch_randomize_brk 943#ifdef arch_randomize_brk
944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) 944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
945 current->mm->brk = current->mm->start_brk = 945 current->mm->brk = current->mm->start_brk =
946 arch_randomize_brk(current->mm); 946 arch_randomize_brk(current->mm);
947#ifdef CONFIG_COMPAT_BRK
948 current->brk_randomized = 1;
949#endif
950 }
947#endif 951#endif
948 952
949 if (current->personality & MMAP_PAGE_ZERO) { 953 if (current->personality & MMAP_PAGE_ZERO) {
@@ -1906,7 +1910,7 @@ static int elf_core_dump(struct coredump_params *cprm)
1906 segs = current->mm->map_count; 1910 segs = current->mm->map_count;
1907 segs += elf_core_extra_phdrs(); 1911 segs += elf_core_extra_phdrs();
1908 1912
1909 gate_vma = get_gate_vma(current); 1913 gate_vma = get_gate_vma(current->mm);
1910 if (gate_vma != NULL) 1914 if (gate_vma != NULL)
1911 segs++; 1915 segs++;
1912 1916
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 811384bec8de..397d3057d336 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -717,7 +717,7 @@ static int load_flat_file(struct linux_binprm * bprm,
717 * help simplify all this mumbo jumbo 717 * help simplify all this mumbo jumbo
718 * 718 *
719 * We've got two different sections of relocation entries. 719 * We've got two different sections of relocation entries.
720 * The first is the GOT which resides at the begining of the data segment 720 * The first is the GOT which resides at the beginning of the data segment
721 * and is terminated with a -1. This one can be relocated in place. 721 * and is terminated with a -1. This one can be relocated in place.
722 * The second is the extra relocation entries tacked after the image's 722 * The second is the extra relocation entries tacked after the image's
723 * data segment. These require a little more processing as the entry is 723 * data segment. These require a little more processing as the entry is
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index e49cce234c65..9c5e6b2cd11a 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -761,6 +761,9 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size)
761{ 761{
762 unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); 762 unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
763 763
764 if (bs->bio_integrity_pool)
765 return 0;
766
764 bs->bio_integrity_pool = 767 bs->bio_integrity_pool =
765 mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); 768 mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
766 769
diff --git a/fs/bio.c b/fs/bio.c
index 4bd454fa844e..840a0d755248 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -43,7 +43,7 @@ static mempool_t *bio_split_pool __read_mostly;
43 * unsigned short 43 * unsigned short
44 */ 44 */
45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
46struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 46static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
48}; 48};
49#undef BV 49#undef BV
@@ -111,7 +111,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
111 if (!slab) 111 if (!slab)
112 goto out_unlock; 112 goto out_unlock;
113 113
114 printk("bio: create slab <%s> at %d\n", bslab->name, entry); 114 printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
115 bslab->slab = slab; 115 bslab->slab = slab;
116 bslab->slab_ref = 1; 116 bslab->slab_ref = 1;
117 bslab->slab_size = sz; 117 bslab->slab_size = sz;
@@ -1436,7 +1436,7 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
1436 * preferred way to end I/O on a bio, it takes care of clearing 1436 * preferred way to end I/O on a bio, it takes care of clearing
1437 * BIO_UPTODATE on error. @error is 0 on success, and and one of the 1437 * BIO_UPTODATE on error. @error is 0 on success, and and one of the
1438 * established -Exxxx (-EIO, for instance) error values in case 1438 * established -Exxxx (-EIO, for instance) error values in case
1439 * something went wrong. Noone should call bi_end_io() directly on a 1439 * something went wrong. No one should call bi_end_io() directly on a
1440 * bio unless they own it and thus know that it has an end_io 1440 * bio unless they own it and thus know that it has an end_io
1441 * function. 1441 * function.
1442 **/ 1442 **/
@@ -1636,9 +1636,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1636 if (!bs->bio_pool) 1636 if (!bs->bio_pool)
1637 goto bad; 1637 goto bad;
1638 1638
1639 if (bioset_integrity_create(bs, pool_size))
1640 goto bad;
1641
1642 if (!biovec_create_pools(bs, pool_size)) 1639 if (!biovec_create_pools(bs, pool_size))
1643 return bs; 1640 return bs;
1644 1641
@@ -1656,12 +1653,10 @@ static void __init biovec_init_slabs(void)
1656 int size; 1653 int size;
1657 struct biovec_slab *bvs = bvec_slabs + i; 1654 struct biovec_slab *bvs = bvec_slabs + i;
1658 1655
1659#ifndef CONFIG_BLK_DEV_INTEGRITY
1660 if (bvs->nr_vecs <= BIO_INLINE_VECS) { 1656 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1661 bvs->slab = NULL; 1657 bvs->slab = NULL;
1662 continue; 1658 continue;
1663 } 1659 }
1664#endif
1665 1660
1666 size = bvs->nr_vecs * sizeof(struct bio_vec); 1661 size = bvs->nr_vecs * sizeof(struct bio_vec);
1667 bvs->slab = kmem_cache_create(bvs->name, size, 0, 1662 bvs->slab = kmem_cache_create(bvs->name, size, 0,
@@ -1684,6 +1679,9 @@ static int __init init_bio(void)
1684 if (!fs_bio_set) 1679 if (!fs_bio_set)
1685 panic("bio: can't allocate bios\n"); 1680 panic("bio: can't allocate bios\n");
1686 1681
1682 if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
1683 panic("bio: can't create integrity pool\n");
1684
1687 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES, 1685 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1688 sizeof(struct bio_pair)); 1686 sizeof(struct bio_pair));
1689 if (!bio_split_pool) 1687 if (!bio_split_pool)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 889287019599..5147bdd3b8e1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -55,11 +55,13 @@ EXPORT_SYMBOL(I_BDEV);
55static void bdev_inode_switch_bdi(struct inode *inode, 55static void bdev_inode_switch_bdi(struct inode *inode,
56 struct backing_dev_info *dst) 56 struct backing_dev_info *dst)
57{ 57{
58 spin_lock(&inode_lock); 58 spin_lock(&inode_wb_list_lock);
59 spin_lock(&inode->i_lock);
59 inode->i_data.backing_dev_info = dst; 60 inode->i_data.backing_dev_info = dst;
60 if (inode->i_state & I_DIRTY) 61 if (inode->i_state & I_DIRTY)
61 list_move(&inode->i_wb_list, &dst->wb.b_dirty); 62 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
62 spin_unlock(&inode_lock); 63 spin_unlock(&inode->i_lock);
64 spin_unlock(&inode_wb_list_lock);
63} 65}
64 66
65static sector_t max_block(struct block_device *bdev) 67static sector_t max_block(struct block_device *bdev)
@@ -651,7 +653,7 @@ void bd_forget(struct inode *inode)
651 * @whole: whole block device containing @bdev, may equal @bdev 653 * @whole: whole block device containing @bdev, may equal @bdev
652 * @holder: holder trying to claim @bdev 654 * @holder: holder trying to claim @bdev
653 * 655 *
654 * Test whther @bdev can be claimed by @holder. 656 * Test whether @bdev can be claimed by @holder.
655 * 657 *
656 * CONTEXT: 658 * CONTEXT:
657 * spin_lock(&bdev_lock). 659 * spin_lock(&bdev_lock).
@@ -1087,6 +1089,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1087 if (!disk) 1089 if (!disk)
1088 goto out; 1090 goto out;
1089 1091
1092 disk_block_events(disk);
1090 mutex_lock_nested(&bdev->bd_mutex, for_part); 1093 mutex_lock_nested(&bdev->bd_mutex, for_part);
1091 if (!bdev->bd_openers) { 1094 if (!bdev->bd_openers) {
1092 bdev->bd_disk = disk; 1095 bdev->bd_disk = disk;
@@ -1108,10 +1111,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1108 */ 1111 */
1109 disk_put_part(bdev->bd_part); 1112 disk_put_part(bdev->bd_part);
1110 bdev->bd_part = NULL; 1113 bdev->bd_part = NULL;
1111 module_put(disk->fops->owner);
1112 put_disk(disk);
1113 bdev->bd_disk = NULL; 1114 bdev->bd_disk = NULL;
1114 mutex_unlock(&bdev->bd_mutex); 1115 mutex_unlock(&bdev->bd_mutex);
1116 disk_unblock_events(disk);
1117 module_put(disk->fops->owner);
1118 put_disk(disk);
1115 goto restart; 1119 goto restart;
1116 } 1120 }
1117 if (ret) 1121 if (ret)
@@ -1148,9 +1152,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1148 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1152 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1149 } 1153 }
1150 } else { 1154 } else {
1151 module_put(disk->fops->owner);
1152 put_disk(disk);
1153 disk = NULL;
1154 if (bdev->bd_contains == bdev) { 1155 if (bdev->bd_contains == bdev) {
1155 if (bdev->bd_disk->fops->open) { 1156 if (bdev->bd_disk->fops->open) {
1156 ret = bdev->bd_disk->fops->open(bdev, mode); 1157 ret = bdev->bd_disk->fops->open(bdev, mode);
@@ -1160,11 +1161,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1160 if (bdev->bd_invalidated) 1161 if (bdev->bd_invalidated)
1161 rescan_partitions(bdev->bd_disk, bdev); 1162 rescan_partitions(bdev->bd_disk, bdev);
1162 } 1163 }
1164 /* only one opener holds refs to the module and disk */
1165 module_put(disk->fops->owner);
1166 put_disk(disk);
1163 } 1167 }
1164 bdev->bd_openers++; 1168 bdev->bd_openers++;
1165 if (for_part) 1169 if (for_part)
1166 bdev->bd_part_count++; 1170 bdev->bd_part_count++;
1167 mutex_unlock(&bdev->bd_mutex); 1171 mutex_unlock(&bdev->bd_mutex);
1172 disk_unblock_events(disk);
1168 return 0; 1173 return 0;
1169 1174
1170 out_clear: 1175 out_clear:
@@ -1177,10 +1182,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1177 bdev->bd_contains = NULL; 1182 bdev->bd_contains = NULL;
1178 out_unlock_bdev: 1183 out_unlock_bdev:
1179 mutex_unlock(&bdev->bd_mutex); 1184 mutex_unlock(&bdev->bd_mutex);
1180 out: 1185 disk_unblock_events(disk);
1181 if (disk) 1186 module_put(disk->fops->owner);
1182 module_put(disk->fops->owner);
1183 put_disk(disk); 1187 put_disk(disk);
1188 out:
1184 bdput(bdev); 1189 bdput(bdev);
1185 1190
1186 return ret; 1191 return ret;
@@ -1446,14 +1451,13 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
1446 if (bdev_free) { 1451 if (bdev_free) {
1447 if (bdev->bd_write_holder) { 1452 if (bdev->bd_write_holder) {
1448 disk_unblock_events(bdev->bd_disk); 1453 disk_unblock_events(bdev->bd_disk);
1449 bdev->bd_write_holder = false;
1450 } else
1451 disk_check_events(bdev->bd_disk); 1454 disk_check_events(bdev->bd_disk);
1455 bdev->bd_write_holder = false;
1456 }
1452 } 1457 }
1453 1458
1454 mutex_unlock(&bdev->bd_mutex); 1459 mutex_unlock(&bdev->bd_mutex);
1455 } else 1460 }
1456 disk_check_events(bdev->bd_disk);
1457 1461
1458 return __blkdev_put(bdev, mode, 0); 1462 return __blkdev_put(bdev, mode, 0);
1459} 1463}
@@ -1527,7 +1531,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
1527static const struct address_space_operations def_blk_aops = { 1531static const struct address_space_operations def_blk_aops = {
1528 .readpage = blkdev_readpage, 1532 .readpage = blkdev_readpage,
1529 .writepage = blkdev_writepage, 1533 .writepage = blkdev_writepage,
1530 .sync_page = block_sync_page,
1531 .write_begin = blkdev_write_begin, 1534 .write_begin = blkdev_write_begin,
1532 .write_end = blkdev_write_end, 1535 .write_end = blkdev_write_end,
1533 .writepages = generic_writepages, 1536 .writepages = generic_writepages,
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9c949348510b..5d505aaa72fb 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -170,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
170 int ret; 170 int ret;
171 struct posix_acl *acl = NULL; 171 struct posix_acl *acl = NULL;
172 172
173 if (!is_owner_or_cap(dentry->d_inode)) 173 if (!inode_owner_or_capable(dentry->d_inode))
174 return -EPERM; 174 return -EPERM;
175 175
176 if (!IS_POSIXACL(dentry->d_inode)) 176 if (!IS_POSIXACL(dentry->d_inode))
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (acl == NULL) { 181 if (acl) {
182 value = NULL; 182 ret = posix_acl_valid(acl);
183 size = 0; 183 if (ret)
184 goto out;
184 } else if (IS_ERR(acl)) { 185 } else if (IS_ERR(acl)) {
185 return PTR_ERR(acl); 186 return PTR_ERR(acl);
186 } 187 }
187 } 188 }
188 189
189 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); 190 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
190 191out:
191 posix_acl_release(acl); 192 posix_acl_release(acl);
192 193
193 return ret; 194 return ret;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ccc991c542df..57c3bb2884ce 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -136,9 +136,8 @@ struct btrfs_inode {
136 * items we think we'll end up using, and reserved_extents is the number 136 * items we think we'll end up using, and reserved_extents is the number
137 * of extent items we've reserved metadata for. 137 * of extent items we've reserved metadata for.
138 */ 138 */
139 spinlock_t accounting_lock;
140 atomic_t outstanding_extents; 139 atomic_t outstanding_extents;
141 int reserved_extents; 140 atomic_t reserved_extents;
142 141
143 /* 142 /*
144 * ordered_data_close is set by truncate when a file that used 143 * ordered_data_close is set by truncate when a file that used
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4d2110eafe29..41d1d7c70e29 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -340,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
340 340
341 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); 341 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
342 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 342 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
343 if (!cb)
344 return -ENOMEM;
343 atomic_set(&cb->pending_bios, 0); 345 atomic_set(&cb->pending_bios, 0);
344 cb->errors = 0; 346 cb->errors = 0;
345 cb->inode = inode; 347 cb->inode = inode;
@@ -354,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
354 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 356 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
355 357
356 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); 358 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
359 if(!bio) {
360 kfree(cb);
361 return -ENOMEM;
362 }
357 bio->bi_private = cb; 363 bio->bi_private = cb;
358 bio->bi_end_io = end_compressed_bio_write; 364 bio->bi_end_io = end_compressed_bio_write;
359 atomic_inc(&cb->pending_bios); 365 atomic_inc(&cb->pending_bios);
@@ -657,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
657 atomic_inc(&cb->pending_bios); 663 atomic_inc(&cb->pending_bios);
658 664
659 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 665 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
660 btrfs_lookup_bio_sums(root, inode, comp_bio, 666 ret = btrfs_lookup_bio_sums(root, inode,
661 sums); 667 comp_bio, sums);
668 BUG_ON(ret);
662 } 669 }
663 sums += (comp_bio->bi_size + root->sectorsize - 1) / 670 sums += (comp_bio->bi_size + root->sectorsize - 1) /
664 root->sectorsize; 671 root->sectorsize;
@@ -683,8 +690,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
683 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 690 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
684 BUG_ON(ret); 691 BUG_ON(ret);
685 692
686 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 693 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
687 btrfs_lookup_bio_sums(root, inode, comp_bio, sums); 694 ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
695 BUG_ON(ret);
696 }
688 697
689 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); 698 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
690 BUG_ON(ret); 699 BUG_ON(ret);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b5baff0dccfe..84d7ca1fe0ba 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -147,10 +147,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
147struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 147struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
148{ 148{
149 struct extent_buffer *eb; 149 struct extent_buffer *eb;
150 spin_lock(&root->node_lock); 150
151 eb = root->node; 151 rcu_read_lock();
152 eb = rcu_dereference(root->node);
152 extent_buffer_get(eb); 153 extent_buffer_get(eb);
153 spin_unlock(&root->node_lock); 154 rcu_read_unlock();
154 return eb; 155 return eb;
155} 156}
156 157
@@ -165,14 +166,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
165 while (1) { 166 while (1) {
166 eb = btrfs_root_node(root); 167 eb = btrfs_root_node(root);
167 btrfs_tree_lock(eb); 168 btrfs_tree_lock(eb);
168 169 if (eb == root->node)
169 spin_lock(&root->node_lock);
170 if (eb == root->node) {
171 spin_unlock(&root->node_lock);
172 break; 170 break;
173 }
174 spin_unlock(&root->node_lock);
175
176 btrfs_tree_unlock(eb); 171 btrfs_tree_unlock(eb);
177 free_extent_buffer(eb); 172 free_extent_buffer(eb);
178 } 173 }
@@ -458,10 +453,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
458 else 453 else
459 parent_start = 0; 454 parent_start = 0;
460 455
461 spin_lock(&root->node_lock);
462 root->node = cow;
463 extent_buffer_get(cow); 456 extent_buffer_get(cow);
464 spin_unlock(&root->node_lock); 457 rcu_assign_pointer(root->node, cow);
465 458
466 btrfs_free_tree_block(trans, root, buf, parent_start, 459 btrfs_free_tree_block(trans, root, buf, parent_start,
467 last_ref); 460 last_ref);
@@ -542,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
542 535
543 ret = __btrfs_cow_block(trans, root, buf, parent, 536 ret = __btrfs_cow_block(trans, root, buf, parent,
544 parent_slot, cow_ret, search_start, 0); 537 parent_slot, cow_ret, search_start, 0);
538
539 trace_btrfs_cow_block(root, buf, *cow_ret);
540
545 return ret; 541 return ret;
546} 542}
547 543
@@ -686,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
686 if (!cur) { 682 if (!cur) {
687 cur = read_tree_block(root, blocknr, 683 cur = read_tree_block(root, blocknr,
688 blocksize, gen); 684 blocksize, gen);
685 if (!cur)
686 return -EIO;
689 } else if (!uptodate) { 687 } else if (!uptodate) {
690 btrfs_read_buffer(cur, gen); 688 btrfs_read_buffer(cur, gen);
691 } 689 }
@@ -732,122 +730,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
732 return btrfs_item_offset_nr(leaf, nr - 1); 730 return btrfs_item_offset_nr(leaf, nr - 1);
733} 731}
734 732
735/*
736 * extra debugging checks to make sure all the items in a key are
737 * well formed and in the proper order
738 */
739static int check_node(struct btrfs_root *root, struct btrfs_path *path,
740 int level)
741{
742 struct extent_buffer *parent = NULL;
743 struct extent_buffer *node = path->nodes[level];
744 struct btrfs_disk_key parent_key;
745 struct btrfs_disk_key node_key;
746 int parent_slot;
747 int slot;
748 struct btrfs_key cpukey;
749 u32 nritems = btrfs_header_nritems(node);
750
751 if (path->nodes[level + 1])
752 parent = path->nodes[level + 1];
753
754 slot = path->slots[level];
755 BUG_ON(nritems == 0);
756 if (parent) {
757 parent_slot = path->slots[level + 1];
758 btrfs_node_key(parent, &parent_key, parent_slot);
759 btrfs_node_key(node, &node_key, 0);
760 BUG_ON(memcmp(&parent_key, &node_key,
761 sizeof(struct btrfs_disk_key)));
762 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
763 btrfs_header_bytenr(node));
764 }
765 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
766 if (slot != 0) {
767 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
768 btrfs_node_key(node, &node_key, slot);
769 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
770 }
771 if (slot < nritems - 1) {
772 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
773 btrfs_node_key(node, &node_key, slot);
774 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
775 }
776 return 0;
777}
778
779/*
780 * extra checking to make sure all the items in a leaf are
781 * well formed and in the proper order
782 */
783static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
784 int level)
785{
786 struct extent_buffer *leaf = path->nodes[level];
787 struct extent_buffer *parent = NULL;
788 int parent_slot;
789 struct btrfs_key cpukey;
790 struct btrfs_disk_key parent_key;
791 struct btrfs_disk_key leaf_key;
792 int slot = path->slots[0];
793
794 u32 nritems = btrfs_header_nritems(leaf);
795
796 if (path->nodes[level + 1])
797 parent = path->nodes[level + 1];
798
799 if (nritems == 0)
800 return 0;
801
802 if (parent) {
803 parent_slot = path->slots[level + 1];
804 btrfs_node_key(parent, &parent_key, parent_slot);
805 btrfs_item_key(leaf, &leaf_key, 0);
806
807 BUG_ON(memcmp(&parent_key, &leaf_key,
808 sizeof(struct btrfs_disk_key)));
809 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
810 btrfs_header_bytenr(leaf));
811 }
812 if (slot != 0 && slot < nritems - 1) {
813 btrfs_item_key(leaf, &leaf_key, slot);
814 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
815 if (comp_keys(&leaf_key, &cpukey) <= 0) {
816 btrfs_print_leaf(root, leaf);
817 printk(KERN_CRIT "slot %d offset bad key\n", slot);
818 BUG_ON(1);
819 }
820 if (btrfs_item_offset_nr(leaf, slot - 1) !=
821 btrfs_item_end_nr(leaf, slot)) {
822 btrfs_print_leaf(root, leaf);
823 printk(KERN_CRIT "slot %d offset bad\n", slot);
824 BUG_ON(1);
825 }
826 }
827 if (slot < nritems - 1) {
828 btrfs_item_key(leaf, &leaf_key, slot);
829 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
830 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
831 if (btrfs_item_offset_nr(leaf, slot) !=
832 btrfs_item_end_nr(leaf, slot + 1)) {
833 btrfs_print_leaf(root, leaf);
834 printk(KERN_CRIT "slot %d offset bad\n", slot);
835 BUG_ON(1);
836 }
837 }
838 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
839 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
840 return 0;
841}
842
843static noinline int check_block(struct btrfs_root *root,
844 struct btrfs_path *path, int level)
845{
846 return 0;
847 if (level == 0)
848 return check_leaf(root, path, level);
849 return check_node(root, path, level);
850}
851 733
852/* 734/*
853 * search for key in the extent_buffer. The items start at offset p, 735 * search for key in the extent_buffer. The items start at offset p,
@@ -1046,9 +928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1046 goto enospc; 928 goto enospc;
1047 } 929 }
1048 930
1049 spin_lock(&root->node_lock); 931 rcu_assign_pointer(root->node, child);
1050 root->node = child;
1051 spin_unlock(&root->node_lock);
1052 932
1053 add_root_to_dirty_list(root); 933 add_root_to_dirty_list(root);
1054 btrfs_tree_unlock(child); 934 btrfs_tree_unlock(child);
@@ -1188,7 +1068,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1188 } 1068 }
1189 } 1069 }
1190 /* double check we haven't messed things up */ 1070 /* double check we haven't messed things up */
1191 check_block(root, path, level);
1192 if (orig_ptr != 1071 if (orig_ptr !=
1193 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1072 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1194 BUG(); 1073 BUG();
@@ -1798,12 +1677,6 @@ cow_done:
1798 if (!cow) 1677 if (!cow)
1799 btrfs_unlock_up_safe(p, level + 1); 1678 btrfs_unlock_up_safe(p, level + 1);
1800 1679
1801 ret = check_block(root, p, level);
1802 if (ret) {
1803 ret = -1;
1804 goto done;
1805 }
1806
1807 ret = bin_search(b, key, level, &slot); 1680 ret = bin_search(b, key, level, &slot);
1808 1681
1809 if (level != 0) { 1682 if (level != 0) {
@@ -2130,10 +2003,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2130 2003
2131 btrfs_mark_buffer_dirty(c); 2004 btrfs_mark_buffer_dirty(c);
2132 2005
2133 spin_lock(&root->node_lock);
2134 old = root->node; 2006 old = root->node;
2135 root->node = c; 2007 rcu_assign_pointer(root->node, c);
2136 spin_unlock(&root->node_lock);
2137 2008
2138 /* the super has an extra ref to root->node */ 2009 /* the super has an extra ref to root->node */
2139 free_extent_buffer(old); 2010 free_extent_buffer(old);
@@ -3840,7 +3711,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3840 unsigned long ptr; 3711 unsigned long ptr;
3841 3712
3842 path = btrfs_alloc_path(); 3713 path = btrfs_alloc_path();
3843 BUG_ON(!path); 3714 if (!path)
3715 return -ENOMEM;
3844 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3716 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3845 if (!ret) { 3717 if (!ret) {
3846 leaf = path->nodes[0]; 3718 leaf = path->nodes[0];
@@ -4217,6 +4089,7 @@ find_next_key:
4217 } 4089 }
4218 btrfs_set_path_blocking(path); 4090 btrfs_set_path_blocking(path);
4219 cur = read_node_slot(root, cur, slot); 4091 cur = read_node_slot(root, cur, slot);
4092 BUG_ON(!cur);
4220 4093
4221 btrfs_tree_lock(cur); 4094 btrfs_tree_lock(cur);
4222 4095
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7f78cc78fdd0..2e61fe1b6b8c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -28,6 +28,7 @@
28#include <linux/wait.h> 28#include <linux/wait.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/kobject.h> 30#include <linux/kobject.h>
31#include <trace/events/btrfs.h>
31#include <asm/kmap_types.h> 32#include <asm/kmap_types.h>
32#include "extent_io.h" 33#include "extent_io.h"
33#include "extent_map.h" 34#include "extent_map.h"
@@ -40,6 +41,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
40extern struct kmem_cache *btrfs_transaction_cachep; 41extern struct kmem_cache *btrfs_transaction_cachep;
41extern struct kmem_cache *btrfs_bit_radix_cachep; 42extern struct kmem_cache *btrfs_bit_radix_cachep;
42extern struct kmem_cache *btrfs_path_cachep; 43extern struct kmem_cache *btrfs_path_cachep;
44extern struct kmem_cache *btrfs_free_space_cachep;
43struct btrfs_ordered_sum; 45struct btrfs_ordered_sum;
44 46
45#define BTRFS_MAGIC "_BHRfS_M" 47#define BTRFS_MAGIC "_BHRfS_M"
@@ -738,8 +740,10 @@ struct btrfs_space_info {
738 */ 740 */
739 unsigned long reservation_progress; 741 unsigned long reservation_progress;
740 742
741 int full; /* indicates that we cannot allocate any more 743 int full:1; /* indicates that we cannot allocate any more
742 chunks for this space */ 744 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */
746
743 int force_alloc; /* set if we need to force a chunk alloc for 747 int force_alloc; /* set if we need to force a chunk alloc for
744 this space */ 748 this space */
745 749
@@ -782,9 +786,6 @@ struct btrfs_free_cluster {
782 /* first extent starting offset */ 786 /* first extent starting offset */
783 u64 window_start; 787 u64 window_start;
784 788
785 /* if this cluster simply points at a bitmap in the block group */
786 bool points_to_bitmap;
787
788 struct btrfs_block_group_cache *block_group; 789 struct btrfs_block_group_cache *block_group;
789 /* 790 /*
790 * when a cluster is allocated from a block group, we put the 791 * when a cluster is allocated from a block group, we put the
@@ -1283,6 +1284,9 @@ struct btrfs_root {
1283#define BTRFS_INODE_NODUMP (1 << 8) 1284#define BTRFS_INODE_NODUMP (1 << 8)
1284#define BTRFS_INODE_NOATIME (1 << 9) 1285#define BTRFS_INODE_NOATIME (1 << 9)
1285#define BTRFS_INODE_DIRSYNC (1 << 10) 1286#define BTRFS_INODE_DIRSYNC (1 << 10)
1287#define BTRFS_INODE_COMPRESS (1 << 11)
1288
1289#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
1286 1290
1287/* some macros to generate set/get funcs for the struct fields. This 1291/* some macros to generate set/get funcs for the struct fields. This
1288 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1292 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
@@ -2157,6 +2161,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
2157 u64 root_objectid, u64 owner, u64 offset); 2161 u64 root_objectid, u64 owner, u64 offset);
2158 2162
2159int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 2163int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
2164int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
2165 u64 num_bytes, int reserve, int sinfo);
2160int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 2166int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
2161 struct btrfs_root *root); 2167 struct btrfs_root *root);
2162int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2168int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
@@ -2227,10 +2233,12 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
2227int btrfs_error_unpin_extent_range(struct btrfs_root *root, 2233int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2228 u64 start, u64 end); 2234 u64 start, u64 end);
2229int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2235int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2230 u64 num_bytes); 2236 u64 num_bytes, u64 *actual_bytes);
2231int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 2237int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root, u64 type); 2238 struct btrfs_root *root, u64 type);
2239int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
2233 2240
2241int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
2234/* ctree.c */ 2242/* ctree.c */
2235int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2243int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2236 int level, int *slot); 2244 int level, int *slot);
@@ -2355,6 +2363,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2355int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2363int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2356int btrfs_set_root_node(struct btrfs_root_item *item, 2364int btrfs_set_root_node(struct btrfs_root_item *item,
2357 struct extent_buffer *node); 2365 struct extent_buffer *node);
2366void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2367
2358/* dir-item.c */ 2368/* dir-item.c */
2359int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2369int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2360 struct btrfs_root *root, const char *name, 2370 struct btrfs_root *root, const char *name,
@@ -2392,6 +2402,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2392 struct btrfs_path *path, u64 dir, 2402 struct btrfs_path *path, u64 dir,
2393 const char *name, u16 name_len, 2403 const char *name, u16 name_len,
2394 int mod); 2404 int mod);
2405int verify_dir_item(struct btrfs_root *root,
2406 struct extent_buffer *leaf,
2407 struct btrfs_dir_item *dir_item);
2395 2408
2396/* orphan.c */ 2409/* orphan.c */
2397int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, 2410int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -2528,7 +2541,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
2528 struct inode *inode); 2541 struct inode *inode);
2529int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); 2542int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
2530int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); 2543int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2531void btrfs_orphan_cleanup(struct btrfs_root *root); 2544int btrfs_orphan_cleanup(struct btrfs_root *root);
2532void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, 2545void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
2533 struct btrfs_pending_snapshot *pending, 2546 struct btrfs_pending_snapshot *pending,
2534 u64 *bytes_to_reserve); 2547 u64 *bytes_to_reserve);
@@ -2536,7 +2549,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
2536 struct btrfs_pending_snapshot *pending); 2549 struct btrfs_pending_snapshot *pending);
2537void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, 2550void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2538 struct btrfs_root *root); 2551 struct btrfs_root *root);
2539int btrfs_cont_expand(struct inode *inode, loff_t size); 2552int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
2540int btrfs_invalidate_inodes(struct btrfs_root *root); 2553int btrfs_invalidate_inodes(struct btrfs_root *root);
2541void btrfs_add_delayed_iput(struct inode *inode); 2554void btrfs_add_delayed_iput(struct inode *inode);
2542void btrfs_run_delayed_iputs(struct btrfs_root *root); 2555void btrfs_run_delayed_iputs(struct btrfs_root *root);
@@ -2565,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2565int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2578int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2566 struct inode *inode, u64 start, u64 end); 2579 struct inode *inode, u64 start, u64 end);
2567int btrfs_release_file(struct inode *inode, struct file *file); 2580int btrfs_release_file(struct inode *inode, struct file *file);
2581void btrfs_drop_pages(struct page **pages, size_t num_pages);
2582int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
2583 struct page **pages, size_t num_pages,
2584 loff_t pos, size_t write_bytes,
2585 struct extent_state **cached);
2568 2586
2569/* tree-defrag.c */ 2587/* tree-defrag.c */
2570int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 2588int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e807b143b857..bce28f653899 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
483 INIT_LIST_HEAD(&head_ref->cluster); 483 INIT_LIST_HEAD(&head_ref->cluster);
484 mutex_init(&head_ref->mutex); 484 mutex_init(&head_ref->mutex);
485 485
486 trace_btrfs_delayed_ref_head(ref, head_ref, action);
487
486 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 488 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
487 489
488 if (existing) { 490 if (existing) {
@@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
537 } 539 }
538 full_ref->level = level; 540 full_ref->level = level;
539 541
542 trace_btrfs_delayed_tree_ref(ref, full_ref, action);
543
540 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 544 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
541 545
542 if (existing) { 546 if (existing) {
@@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
591 full_ref->objectid = owner; 595 full_ref->objectid = owner;
592 full_ref->offset = offset; 596 full_ref->offset = offset;
593 597
598 trace_btrfs_delayed_data_ref(ref, full_ref, action);
599
594 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 600 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
595 601
596 if (existing) { 602 if (existing) {
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f0cad5ae5be7..c62f02f6ae69 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -151,7 +151,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
151 ret = PTR_ERR(dir_item); 151 ret = PTR_ERR(dir_item);
152 if (ret == -EEXIST) 152 if (ret == -EEXIST)
153 goto second_insert; 153 goto second_insert;
154 goto out; 154 goto out_free;
155 } 155 }
156 156
157 leaf = path->nodes[0]; 157 leaf = path->nodes[0];
@@ -170,7 +170,7 @@ second_insert:
170 /* FIXME, use some real flag for selecting the extra index */ 170 /* FIXME, use some real flag for selecting the extra index */
171 if (root == root->fs_info->tree_root) { 171 if (root == root->fs_info->tree_root) {
172 ret = 0; 172 ret = 0;
173 goto out; 173 goto out_free;
174 } 174 }
175 btrfs_release_path(root, path); 175 btrfs_release_path(root, path);
176 176
@@ -180,7 +180,7 @@ second_insert:
180 name, name_len); 180 name, name_len);
181 if (IS_ERR(dir_item)) { 181 if (IS_ERR(dir_item)) {
182 ret2 = PTR_ERR(dir_item); 182 ret2 = PTR_ERR(dir_item);
183 goto out; 183 goto out_free;
184 } 184 }
185 leaf = path->nodes[0]; 185 leaf = path->nodes[0];
186 btrfs_cpu_key_to_disk(&disk_key, location); 186 btrfs_cpu_key_to_disk(&disk_key, location);
@@ -192,7 +192,9 @@ second_insert:
192 name_ptr = (unsigned long)(dir_item + 1); 192 name_ptr = (unsigned long)(dir_item + 1);
193 write_extent_buffer(leaf, name, name_ptr, name_len); 193 write_extent_buffer(leaf, name, name_ptr, name_len);
194 btrfs_mark_buffer_dirty(leaf); 194 btrfs_mark_buffer_dirty(leaf);
195out: 195
196out_free:
197
196 btrfs_free_path(path); 198 btrfs_free_path(path);
197 if (ret) 199 if (ret)
198 return ret; 200 return ret;
@@ -377,6 +379,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
377 379
378 leaf = path->nodes[0]; 380 leaf = path->nodes[0];
379 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); 381 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
382 if (verify_dir_item(root, leaf, dir_item))
383 return NULL;
384
380 total_len = btrfs_item_size_nr(leaf, path->slots[0]); 385 total_len = btrfs_item_size_nr(leaf, path->slots[0]);
381 while (cur < total_len) { 386 while (cur < total_len) {
382 this_len = sizeof(*dir_item) + 387 this_len = sizeof(*dir_item) +
@@ -429,3 +434,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
429 } 434 }
430 return ret; 435 return ret;
431} 436}
437
438int verify_dir_item(struct btrfs_root *root,
439 struct extent_buffer *leaf,
440 struct btrfs_dir_item *dir_item)
441{
442 u16 namelen = BTRFS_NAME_LEN;
443 u8 type = btrfs_dir_type(leaf, dir_item);
444
445 if (type >= BTRFS_FT_MAX) {
446 printk(KERN_CRIT "btrfs: invalid dir item type: %d\n",
447 (int)type);
448 return 1;
449 }
450
451 if (type == BTRFS_FT_XATTR)
452 namelen = XATTR_NAME_MAX;
453
454 if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
455 printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n",
456 (unsigned)btrfs_dir_data_len(leaf, dir_item));
457 return 1;
458 }
459
460 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
461 if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
462 printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
463 (unsigned)btrfs_dir_data_len(leaf, dir_item));
464 return 1;
465 }
466
467 return 0;
468}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 100b07f021b4..68c84c8c24bd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <asm/unaligned.h>
32#include "compat.h" 33#include "compat.h"
33#include "ctree.h" 34#include "ctree.h"
34#include "disk-io.h" 35#include "disk-io.h"
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
198 199
199void btrfs_csum_final(u32 crc, char *result) 200void btrfs_csum_final(u32 crc, char *result)
200{ 201{
201 *(__le32 *)result = ~cpu_to_le32(crc); 202 put_unaligned_le32(~crc, result);
202} 203}
203 204
204/* 205/*
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
323 int num_copies = 0; 324 int num_copies = 0;
324 int mirror_num = 0; 325 int mirror_num = 0;
325 326
327 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
326 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 328 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
327 while (1) { 329 while (1) {
328 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 330 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
331 !verify_parent_transid(io_tree, eb, parent_transid)) 333 !verify_parent_transid(io_tree, eb, parent_transid))
332 return ret; 334 return ret;
333 335
336 /*
337 * This buffer's crc is fine, but its contents are corrupted, so
338 * there is no reason to read the other copies, they won't be
339 * any less wrong.
340 */
341 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
342 return ret;
343
334 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 344 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
335 eb->start, eb->len); 345 eb->start, eb->len);
336 if (num_copies == 1) 346 if (num_copies == 1)
@@ -419,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root,
419 return ret; 429 return ret;
420} 430}
421 431
432#define CORRUPT(reason, eb, root, slot) \
433 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
434 "root=%llu, slot=%d\n", reason, \
435 (unsigned long long)btrfs_header_bytenr(eb), \
436 (unsigned long long)root->objectid, slot)
437
438static noinline int check_leaf(struct btrfs_root *root,
439 struct extent_buffer *leaf)
440{
441 struct btrfs_key key;
442 struct btrfs_key leaf_key;
443 u32 nritems = btrfs_header_nritems(leaf);
444 int slot;
445
446 if (nritems == 0)
447 return 0;
448
449 /* Check the 0 item */
450 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
451 BTRFS_LEAF_DATA_SIZE(root)) {
452 CORRUPT("invalid item offset size pair", leaf, root, 0);
453 return -EIO;
454 }
455
456 /*
457 * Check to make sure each items keys are in the correct order and their
458 * offsets make sense. We only have to loop through nritems-1 because
459 * we check the current slot against the next slot, which verifies the
460 * next slot's offset+size makes sense and that the current's slot
461 * offset is correct.
462 */
463 for (slot = 0; slot < nritems - 1; slot++) {
464 btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
465 btrfs_item_key_to_cpu(leaf, &key, slot + 1);
466
467 /* Make sure the keys are in the right order */
468 if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
469 CORRUPT("bad key order", leaf, root, slot);
470 return -EIO;
471 }
472
473 /*
474 * Make sure the offset and ends are right, remember that the
475 * item data starts at the end of the leaf and grows towards the
476 * front.
477 */
478 if (btrfs_item_offset_nr(leaf, slot) !=
479 btrfs_item_end_nr(leaf, slot + 1)) {
480 CORRUPT("slot offset bad", leaf, root, slot);
481 return -EIO;
482 }
483
484 /*
485 * Check to make sure that we don't point outside of the leaf,
486 * just incase all the items are consistent to eachother, but
487 * all point outside of the leaf.
488 */
489 if (btrfs_item_end_nr(leaf, slot) >
490 BTRFS_LEAF_DATA_SIZE(root)) {
491 CORRUPT("slot end outside of leaf", leaf, root, slot);
492 return -EIO;
493 }
494 }
495
496 return 0;
497}
498
422#ifdef CONFIG_DEBUG_LOCK_ALLOC 499#ifdef CONFIG_DEBUG_LOCK_ALLOC
423void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) 500void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
424{ 501{
@@ -485,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
485 btrfs_set_buffer_lockdep_class(eb, found_level); 562 btrfs_set_buffer_lockdep_class(eb, found_level);
486 563
487 ret = csum_tree_block(root, eb, 1); 564 ret = csum_tree_block(root, eb, 1);
488 if (ret) 565 if (ret) {
566 ret = -EIO;
567 goto err;
568 }
569
570 /*
571 * If this is a leaf block and it is corrupt, set the corrupt bit so
572 * that we don't try and read the other copies of this block, just
573 * return -EIO.
574 */
575 if (found_level == 0 && check_leaf(root, eb)) {
576 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
489 ret = -EIO; 577 ret = -EIO;
578 }
490 579
491 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 580 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
492 end = eb->start + end - 1; 581 end = eb->start + end - 1;
@@ -847,7 +936,6 @@ static const struct address_space_operations btree_aops = {
847 .writepages = btree_writepages, 936 .writepages = btree_writepages,
848 .releasepage = btree_releasepage, 937 .releasepage = btree_releasepage,
849 .invalidatepage = btree_invalidatepage, 938 .invalidatepage = btree_invalidatepage,
850 .sync_page = block_sync_page,
851#ifdef CONFIG_MIGRATION 939#ifdef CONFIG_MIGRATION
852 .migratepage = btree_migratepage, 940 .migratepage = btree_migratepage,
853#endif 941#endif
@@ -1160,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1160 root, fs_info, location->objectid); 1248 root, fs_info, location->objectid);
1161 1249
1162 path = btrfs_alloc_path(); 1250 path = btrfs_alloc_path();
1163 BUG_ON(!path); 1251 if (!path) {
1252 kfree(root);
1253 return ERR_PTR(-ENOMEM);
1254 }
1164 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); 1255 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1165 if (ret == 0) { 1256 if (ret == 0) {
1166 l = path->nodes[0]; 1257 l = path->nodes[0];
@@ -1184,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1184 root->commit_root = btrfs_root_node(root); 1275 root->commit_root = btrfs_root_node(root);
1185 BUG_ON(!root->node); 1276 BUG_ON(!root->node);
1186out: 1277out:
1187 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) 1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1188 root->ref_cows = 1; 1279 root->ref_cows = 1;
1280 btrfs_check_and_init_root_item(&root->root_item);
1281 }
1189 1282
1190 return root; 1283 return root;
1191} 1284}
@@ -1331,82 +1424,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1331} 1424}
1332 1425
1333/* 1426/*
1334 * this unplugs every device on the box, and it is only used when page
1335 * is null
1336 */
1337static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1338{
1339 struct btrfs_device *device;
1340 struct btrfs_fs_info *info;
1341
1342 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1343 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1344 if (!device->bdev)
1345 continue;
1346
1347 bdi = blk_get_backing_dev_info(device->bdev);
1348 if (bdi->unplug_io_fn)
1349 bdi->unplug_io_fn(bdi, page);
1350 }
1351}
1352
1353static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1354{
1355 struct inode *inode;
1356 struct extent_map_tree *em_tree;
1357 struct extent_map *em;
1358 struct address_space *mapping;
1359 u64 offset;
1360
1361 /* the generic O_DIRECT read code does this */
1362 if (1 || !page) {
1363 __unplug_io_fn(bdi, page);
1364 return;
1365 }
1366
1367 /*
1368 * page->mapping may change at any time. Get a consistent copy
1369 * and use that for everything below
1370 */
1371 smp_mb();
1372 mapping = page->mapping;
1373 if (!mapping)
1374 return;
1375
1376 inode = mapping->host;
1377
1378 /*
1379 * don't do the expensive searching for a small number of
1380 * devices
1381 */
1382 if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
1383 __unplug_io_fn(bdi, page);
1384 return;
1385 }
1386
1387 offset = page_offset(page);
1388
1389 em_tree = &BTRFS_I(inode)->extent_tree;
1390 read_lock(&em_tree->lock);
1391 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1392 read_unlock(&em_tree->lock);
1393 if (!em) {
1394 __unplug_io_fn(bdi, page);
1395 return;
1396 }
1397
1398 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1399 free_extent_map(em);
1400 __unplug_io_fn(bdi, page);
1401 return;
1402 }
1403 offset = offset - em->start;
1404 btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
1405 em->block_start + offset, page);
1406 free_extent_map(em);
1407}
1408
1409/*
1410 * If this fails, caller must call bdi_destroy() to get rid of the 1427 * If this fails, caller must call bdi_destroy() to get rid of the
1411 * bdi again. 1428 * bdi again.
1412 */ 1429 */
@@ -1420,8 +1437,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1420 return err; 1437 return err;
1421 1438
1422 bdi->ra_pages = default_backing_dev_info.ra_pages; 1439 bdi->ra_pages = default_backing_dev_info.ra_pages;
1423 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1424 bdi->unplug_io_data = info;
1425 bdi->congested_fn = btrfs_congested_fn; 1440 bdi->congested_fn = btrfs_congested_fn;
1426 bdi->congested_data = info; 1441 bdi->congested_data = info;
1427 return 0; 1442 return 0;
@@ -1632,6 +1647,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1632 goto fail_bdi; 1647 goto fail_bdi;
1633 } 1648 }
1634 1649
1650 fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
1651
1635 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1652 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1636 INIT_LIST_HEAD(&fs_info->trans_list); 1653 INIT_LIST_HEAD(&fs_info->trans_list);
1637 INIT_LIST_HEAD(&fs_info->dead_roots); 1654 INIT_LIST_HEAD(&fs_info->dead_roots);
@@ -1762,6 +1779,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1762 1779
1763 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 1780 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1764 1781
1782 /*
1783 * In the long term, we'll store the compression type in the super
1784 * block, and it'll be used for per file compression control.
1785 */
1786 fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
1787
1765 ret = btrfs_parse_options(tree_root, options); 1788 ret = btrfs_parse_options(tree_root, options);
1766 if (ret) { 1789 if (ret) {
1767 err = ret; 1790 err = ret;
@@ -1967,6 +1990,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1967 fs_info->metadata_alloc_profile = (u64)-1; 1990 fs_info->metadata_alloc_profile = (u64)-1;
1968 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1991 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1969 1992
1993 ret = btrfs_init_space_info(fs_info);
1994 if (ret) {
1995 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
1996 goto fail_block_groups;
1997 }
1998
1970 ret = btrfs_read_block_groups(extent_root); 1999 ret = btrfs_read_block_groups(extent_root);
1971 if (ret) { 2000 if (ret) {
1972 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 2001 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
@@ -2058,9 +2087,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2058 2087
2059 if (!(sb->s_flags & MS_RDONLY)) { 2088 if (!(sb->s_flags & MS_RDONLY)) {
2060 down_read(&fs_info->cleanup_work_sem); 2089 down_read(&fs_info->cleanup_work_sem);
2061 btrfs_orphan_cleanup(fs_info->fs_root); 2090 err = btrfs_orphan_cleanup(fs_info->fs_root);
2062 btrfs_orphan_cleanup(fs_info->tree_root); 2091 if (!err)
2092 err = btrfs_orphan_cleanup(fs_info->tree_root);
2063 up_read(&fs_info->cleanup_work_sem); 2093 up_read(&fs_info->cleanup_work_sem);
2094 if (err) {
2095 close_ctree(tree_root);
2096 return ERR_PTR(err);
2097 }
2064 } 2098 }
2065 2099
2066 return tree_root; 2100 return tree_root;
@@ -2435,8 +2469,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2435 2469
2436 root_objectid = gang[ret - 1]->root_key.objectid + 1; 2470 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2437 for (i = 0; i < ret; i++) { 2471 for (i = 0; i < ret; i++) {
2472 int err;
2473
2438 root_objectid = gang[i]->root_key.objectid; 2474 root_objectid = gang[i]->root_key.objectid;
2439 btrfs_orphan_cleanup(gang[i]); 2475 err = btrfs_orphan_cleanup(gang[i]);
2476 if (err)
2477 return err;
2440 } 2478 }
2441 root_objectid++; 2479 root_objectid++;
2442 } 2480 }
@@ -2947,7 +2985,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
2947 break; 2985 break;
2948 2986
2949 /* opt_discard */ 2987 /* opt_discard */
2950 ret = btrfs_error_discard_extent(root, start, end + 1 - start); 2988 if (btrfs_test_opt(root, DISCARD))
2989 ret = btrfs_error_discard_extent(root, start,
2990 end + 1 - start,
2991 NULL);
2951 2992
2952 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2993 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2953 btrfs_error_unpin_extent_range(root, start, end); 2994 btrfs_error_unpin_extent_range(root, start, end);
@@ -3016,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3016 btrfs_destroy_pinned_extent(root, 3057 btrfs_destroy_pinned_extent(root,
3017 root->fs_info->pinned_extents); 3058 root->fs_info->pinned_extents);
3018 3059
3019 t->use_count = 0; 3060 atomic_set(&t->use_count, 0);
3020 list_del_init(&t->list); 3061 list_del_init(&t->list);
3021 memset(t, 0, sizeof(*t)); 3062 memset(t, 0, sizeof(*t));
3022 kmem_cache_free(btrfs_transaction_cachep, t); 3063 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7b3089b5c2df..31f33ba56fe8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,11 +33,28 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
39static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
40 u64 num_bytes, int reserve, int sinfo);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 58static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 59 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 60 u64 bytenr, u64 num_bytes, u64 parent,
@@ -442,7 +459,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
442 * allocate blocks for the tree root we can't do the fast caching since 459 * allocate blocks for the tree root we can't do the fast caching since
443 * we likely hold important locks. 460 * we likely hold important locks.
444 */ 461 */
445 if (!trans->transaction->in_commit && 462 if (trans && (!trans->transaction->in_commit) &&
446 (root && root != root->fs_info->tree_root)) { 463 (root && root != root->fs_info->tree_root)) {
447 spin_lock(&cache->lock); 464 spin_lock(&cache->lock);
448 if (cache->cached != BTRFS_CACHE_NO) { 465 if (cache->cached != BTRFS_CACHE_NO) {
@@ -471,7 +488,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
471 if (load_cache_only) 488 if (load_cache_only)
472 return 0; 489 return 0;
473 490
474 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 491 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
475 BUG_ON(!caching_ctl); 492 BUG_ON(!caching_ctl);
476 493
477 INIT_LIST_HEAD(&caching_ctl->list); 494 INIT_LIST_HEAD(&caching_ctl->list);
@@ -1740,39 +1757,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1740 return ret; 1757 return ret;
1741} 1758}
1742 1759
1743static void btrfs_issue_discard(struct block_device *bdev, 1760static int btrfs_issue_discard(struct block_device *bdev,
1744 u64 start, u64 len) 1761 u64 start, u64 len)
1745{ 1762{
1746 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); 1763 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1747} 1764}
1748 1765
1749static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1766static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1750 u64 num_bytes) 1767 u64 num_bytes, u64 *actual_bytes)
1751{ 1768{
1752 int ret; 1769 int ret;
1753 u64 map_length = num_bytes; 1770 u64 discarded_bytes = 0;
1754 struct btrfs_multi_bio *multi = NULL; 1771 struct btrfs_multi_bio *multi = NULL;
1755 1772
1756 if (!btrfs_test_opt(root, DISCARD))
1757 return 0;
1758 1773
1759 /* Tell the block device(s) that the sectors can be discarded */ 1774 /* Tell the block device(s) that the sectors can be discarded */
1760 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1775 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
1761 bytenr, &map_length, &multi, 0); 1776 bytenr, &num_bytes, &multi, 0);
1762 if (!ret) { 1777 if (!ret) {
1763 struct btrfs_bio_stripe *stripe = multi->stripes; 1778 struct btrfs_bio_stripe *stripe = multi->stripes;
1764 int i; 1779 int i;
1765 1780
1766 if (map_length > num_bytes)
1767 map_length = num_bytes;
1768 1781
1769 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1782 for (i = 0; i < multi->num_stripes; i++, stripe++) {
1770 btrfs_issue_discard(stripe->dev->bdev, 1783 ret = btrfs_issue_discard(stripe->dev->bdev,
1771 stripe->physical, 1784 stripe->physical,
1772 map_length); 1785 stripe->length);
1786 if (!ret)
1787 discarded_bytes += stripe->length;
1788 else if (ret != -EOPNOTSUPP)
1789 break;
1773 } 1790 }
1774 kfree(multi); 1791 kfree(multi);
1775 } 1792 }
1793 if (discarded_bytes && ret == -EOPNOTSUPP)
1794 ret = 0;
1795
1796 if (actual_bytes)
1797 *actual_bytes = discarded_bytes;
1798
1776 1799
1777 return ret; 1800 return ret;
1778} 1801}
@@ -3015,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3015 found->bytes_readonly = 0; 3038 found->bytes_readonly = 0;
3016 found->bytes_may_use = 0; 3039 found->bytes_may_use = 0;
3017 found->full = 0; 3040 found->full = 0;
3018 found->force_alloc = 0; 3041 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3042 found->chunk_alloc = 0;
3019 *space_info = found; 3043 *space_info = found;
3020 list_add_rcu(&found->list, &info->space_info); 3044 list_add_rcu(&found->list, &info->space_info);
3021 atomic_set(&found->caching_threads, 0); 3045 atomic_set(&found->caching_threads, 0);
@@ -3146,7 +3170,7 @@ again:
3146 if (!data_sinfo->full && alloc_chunk) { 3170 if (!data_sinfo->full && alloc_chunk) {
3147 u64 alloc_target; 3171 u64 alloc_target;
3148 3172
3149 data_sinfo->force_alloc = 1; 3173 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3150 spin_unlock(&data_sinfo->lock); 3174 spin_unlock(&data_sinfo->lock);
3151alloc: 3175alloc:
3152 alloc_target = btrfs_get_alloc_profile(root, 1); 3176 alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3156,7 +3180,8 @@ alloc:
3156 3180
3157 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3181 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3158 bytes + 2 * 1024 * 1024, 3182 bytes + 2 * 1024 * 1024,
3159 alloc_target, 0); 3183 alloc_target,
3184 CHUNK_ALLOC_NO_FORCE);
3160 btrfs_end_transaction(trans, root); 3185 btrfs_end_transaction(trans, root);
3161 if (ret < 0) { 3186 if (ret < 0) {
3162 if (ret != -ENOSPC) 3187 if (ret != -ENOSPC)
@@ -3235,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3235 rcu_read_lock(); 3260 rcu_read_lock();
3236 list_for_each_entry_rcu(found, head, list) { 3261 list_for_each_entry_rcu(found, head, list) {
3237 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3262 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3238 found->force_alloc = 1; 3263 found->force_alloc = CHUNK_ALLOC_FORCE;
3239 } 3264 }
3240 rcu_read_unlock(); 3265 rcu_read_unlock();
3241} 3266}
3242 3267
3243static int should_alloc_chunk(struct btrfs_root *root, 3268static int should_alloc_chunk(struct btrfs_root *root,
3244 struct btrfs_space_info *sinfo, u64 alloc_bytes) 3269 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3270 int force)
3245{ 3271{
3246 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3272 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3273 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3247 u64 thresh; 3274 u64 thresh;
3248 3275
3249 if (sinfo->bytes_used + sinfo->bytes_reserved + 3276 if (force == CHUNK_ALLOC_FORCE)
3250 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3277 return 1;
3278
3279 /*
3280 * in limited mode, we want to have some free space up to
3281 * about 1% of the FS size.
3282 */
3283 if (force == CHUNK_ALLOC_LIMITED) {
3284 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3285 thresh = max_t(u64, 64 * 1024 * 1024,
3286 div_factor_fine(thresh, 1));
3287
3288 if (num_bytes - num_allocated < thresh)
3289 return 1;
3290 }
3291
3292 /*
3293 * we have two similar checks here, one based on percentage
3294 * and once based on a hard number of 256MB. The idea
3295 * is that if we have a good amount of free
3296 * room, don't allocate a chunk. A good mount is
3297 * less than 80% utilized of the chunks we have allocated,
3298 * or more than 256MB free
3299 */
3300 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3251 return 0; 3301 return 0;
3252 3302
3253 if (sinfo->bytes_used + sinfo->bytes_reserved + 3303 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3254 alloc_bytes < div_factor(num_bytes, 8))
3255 return 0; 3304 return 0;
3256 3305
3257 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3306 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3307
3308 /* 256MB or 5% of the FS */
3258 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3309 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3259 3310
3260 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3311 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3261 return 0; 3312 return 0;
3262
3263 return 1; 3313 return 1;
3264} 3314}
3265 3315
@@ -3269,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3269{ 3319{
3270 struct btrfs_space_info *space_info; 3320 struct btrfs_space_info *space_info;
3271 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3321 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3322 int wait_for_alloc = 0;
3272 int ret = 0; 3323 int ret = 0;
3273 3324
3274 mutex_lock(&fs_info->chunk_mutex);
3275
3276 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3325 flags = btrfs_reduce_alloc_profile(extent_root, flags);
3277 3326
3278 space_info = __find_space_info(extent_root->fs_info, flags); 3327 space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3283,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3283 } 3332 }
3284 BUG_ON(!space_info); 3333 BUG_ON(!space_info);
3285 3334
3335again:
3286 spin_lock(&space_info->lock); 3336 spin_lock(&space_info->lock);
3287 if (space_info->force_alloc) 3337 if (space_info->force_alloc)
3288 force = 1; 3338 force = space_info->force_alloc;
3289 if (space_info->full) { 3339 if (space_info->full) {
3290 spin_unlock(&space_info->lock); 3340 spin_unlock(&space_info->lock);
3291 goto out; 3341 return 0;
3292 } 3342 }
3293 3343
3294 if (!force && !should_alloc_chunk(extent_root, space_info, 3344 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3295 alloc_bytes)) {
3296 spin_unlock(&space_info->lock); 3345 spin_unlock(&space_info->lock);
3297 goto out; 3346 return 0;
3347 } else if (space_info->chunk_alloc) {
3348 wait_for_alloc = 1;
3349 } else {
3350 space_info->chunk_alloc = 1;
3298 } 3351 }
3352
3299 spin_unlock(&space_info->lock); 3353 spin_unlock(&space_info->lock);
3300 3354
3355 mutex_lock(&fs_info->chunk_mutex);
3356
3357 /*
3358 * The chunk_mutex is held throughout the entirety of a chunk
3359 * allocation, so once we've acquired the chunk_mutex we know that the
3360 * other guy is done and we need to recheck and see if we should
3361 * allocate.
3362 */
3363 if (wait_for_alloc) {
3364 mutex_unlock(&fs_info->chunk_mutex);
3365 wait_for_alloc = 0;
3366 goto again;
3367 }
3368
3301 /* 3369 /*
3302 * If we have mixed data/metadata chunks we want to make sure we keep 3370 * If we have mixed data/metadata chunks we want to make sure we keep
3303 * allocating mixed chunks instead of individual chunks. 3371 * allocating mixed chunks instead of individual chunks.
@@ -3323,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3323 space_info->full = 1; 3391 space_info->full = 1;
3324 else 3392 else
3325 ret = 1; 3393 ret = 1;
3326 space_info->force_alloc = 0; 3394
3395 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3396 space_info->chunk_alloc = 0;
3327 spin_unlock(&space_info->lock); 3397 spin_unlock(&space_info->lock);
3328out:
3329 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3398 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3330 return ret; 3399 return ret;
3331} 3400}
@@ -3996,6 +4065,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3996 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4065 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3997 u64 to_reserve; 4066 u64 to_reserve;
3998 int nr_extents; 4067 int nr_extents;
4068 int reserved_extents;
3999 int ret; 4069 int ret;
4000 4070
4001 if (btrfs_transaction_in_commit(root->fs_info)) 4071 if (btrfs_transaction_in_commit(root->fs_info))
@@ -4003,25 +4073,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4003 4073
4004 num_bytes = ALIGN(num_bytes, root->sectorsize); 4074 num_bytes = ALIGN(num_bytes, root->sectorsize);
4005 4075
4006 spin_lock(&BTRFS_I(inode)->accounting_lock);
4007 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 4076 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
4008 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 4077 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4009 nr_extents -= BTRFS_I(inode)->reserved_extents; 4078
4079 if (nr_extents > reserved_extents) {
4080 nr_extents -= reserved_extents;
4010 to_reserve = calc_trans_metadata_size(root, nr_extents); 4081 to_reserve = calc_trans_metadata_size(root, nr_extents);
4011 } else { 4082 } else {
4012 nr_extents = 0; 4083 nr_extents = 0;
4013 to_reserve = 0; 4084 to_reserve = 0;
4014 } 4085 }
4015 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4086
4016 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4087 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4088 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4018 if (ret) 4089 if (ret)
4019 return ret; 4090 return ret;
4020 4091
4021 spin_lock(&BTRFS_I(inode)->accounting_lock); 4092 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
4022 BTRFS_I(inode)->reserved_extents += nr_extents;
4023 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 4093 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
4024 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4025 4094
4026 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4095 block_rsv_add_bytes(block_rsv, to_reserve, 1);
4027 4096
@@ -4036,20 +4105,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4036 struct btrfs_root *root = BTRFS_I(inode)->root; 4105 struct btrfs_root *root = BTRFS_I(inode)->root;
4037 u64 to_free; 4106 u64 to_free;
4038 int nr_extents; 4107 int nr_extents;
4108 int reserved_extents;
4039 4109
4040 num_bytes = ALIGN(num_bytes, root->sectorsize); 4110 num_bytes = ALIGN(num_bytes, root->sectorsize);
4041 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4111 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
4042 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); 4112 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4043 4113
4044 spin_lock(&BTRFS_I(inode)->accounting_lock); 4114 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4045 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4115 do {
4046 if (nr_extents < BTRFS_I(inode)->reserved_extents) { 4116 int old, new;
4047 nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; 4117
4048 BTRFS_I(inode)->reserved_extents -= nr_extents; 4118 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
4049 } else { 4119 if (nr_extents >= reserved_extents) {
4050 nr_extents = 0; 4120 nr_extents = 0;
4051 } 4121 break;
4052 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4122 }
4123 old = reserved_extents;
4124 nr_extents = reserved_extents - nr_extents;
4125 new = reserved_extents - nr_extents;
4126 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
4127 reserved_extents, new);
4128 if (likely(old == reserved_extents))
4129 break;
4130 reserved_extents = old;
4131 } while (1);
4053 4132
4054 to_free = calc_csum_metadata_size(inode, num_bytes); 4133 to_free = calc_csum_metadata_size(inode, num_bytes);
4055 if (nr_extents > 0) 4134 if (nr_extents > 0)
@@ -4223,8 +4302,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
4223 * update size of reserved extents. this function may return -EAGAIN 4302 * update size of reserved extents. this function may return -EAGAIN
4224 * if 'reserve' is true or 'sinfo' is false. 4303 * if 'reserve' is true or 'sinfo' is false.
4225 */ 4304 */
4226static int update_reserved_bytes(struct btrfs_block_group_cache *cache, 4305int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4227 u64 num_bytes, int reserve, int sinfo) 4306 u64 num_bytes, int reserve, int sinfo)
4228{ 4307{
4229 int ret = 0; 4308 int ret = 0;
4230 if (sinfo) { 4309 if (sinfo) {
@@ -4363,7 +4442,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4363 if (ret) 4442 if (ret)
4364 break; 4443 break;
4365 4444
4366 ret = btrfs_discard_extent(root, start, end + 1 - start); 4445 if (btrfs_test_opt(root, DISCARD))
4446 ret = btrfs_discard_extent(root, start,
4447 end + 1 - start, NULL);
4367 4448
4368 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4449 clear_extent_dirty(unpin, start, end, GFP_NOFS);
4369 unpin_extent_range(root, start, end); 4450 unpin_extent_range(root, start, end);
@@ -4704,10 +4785,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4704 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4785 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4705 4786
4706 btrfs_add_free_space(cache, buf->start, buf->len); 4787 btrfs_add_free_space(cache, buf->start, buf->len);
4707 ret = update_reserved_bytes(cache, buf->len, 0, 0); 4788 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
4708 if (ret == -EAGAIN) { 4789 if (ret == -EAGAIN) {
4709 /* block group became read-only */ 4790 /* block group became read-only */
4710 update_reserved_bytes(cache, buf->len, 0, 1); 4791 btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
4711 goto out; 4792 goto out;
4712 } 4793 }
4713 4794
@@ -4744,6 +4825,11 @@ pin:
4744 } 4825 }
4745 } 4826 }
4746out: 4827out:
4828 /*
4829 * Deleting the buffer, clear the corrupt flag since it doesn't matter
4830 * anymore.
4831 */
4832 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
4747 btrfs_put_block_group(cache); 4833 btrfs_put_block_group(cache);
4748} 4834}
4749 4835
@@ -5191,7 +5277,7 @@ checks:
5191 search_start - offset); 5277 search_start - offset);
5192 BUG_ON(offset > search_start); 5278 BUG_ON(offset > search_start);
5193 5279
5194 ret = update_reserved_bytes(block_group, num_bytes, 1, 5280 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
5195 (data & BTRFS_BLOCK_GROUP_DATA)); 5281 (data & BTRFS_BLOCK_GROUP_DATA));
5196 if (ret == -EAGAIN) { 5282 if (ret == -EAGAIN) {
5197 btrfs_add_free_space(block_group, offset, num_bytes); 5283 btrfs_add_free_space(block_group, offset, num_bytes);
@@ -5282,11 +5368,13 @@ loop:
5282 5368
5283 if (allowed_chunk_alloc) { 5369 if (allowed_chunk_alloc) {
5284 ret = do_chunk_alloc(trans, root, num_bytes + 5370 ret = do_chunk_alloc(trans, root, num_bytes +
5285 2 * 1024 * 1024, data, 1); 5371 2 * 1024 * 1024, data,
5372 CHUNK_ALLOC_LIMITED);
5286 allowed_chunk_alloc = 0; 5373 allowed_chunk_alloc = 0;
5287 done_chunk_alloc = 1; 5374 done_chunk_alloc = 1;
5288 } else if (!done_chunk_alloc) { 5375 } else if (!done_chunk_alloc &&
5289 space_info->force_alloc = 1; 5376 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5377 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5290 } 5378 }
5291 5379
5292 if (loop < LOOP_NO_EMPTY_SIZE) { 5380 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5372,7 +5460,8 @@ again:
5372 */ 5460 */
5373 if (empty_size || root->ref_cows) 5461 if (empty_size || root->ref_cows)
5374 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5462 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5375 num_bytes + 2 * 1024 * 1024, data, 0); 5463 num_bytes + 2 * 1024 * 1024, data,
5464 CHUNK_ALLOC_NO_FORCE);
5376 5465
5377 WARN_ON(num_bytes < root->sectorsize); 5466 WARN_ON(num_bytes < root->sectorsize);
5378 ret = find_free_extent(trans, root, num_bytes, empty_size, 5467 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5384,7 +5473,7 @@ again:
5384 num_bytes = num_bytes & ~(root->sectorsize - 1); 5473 num_bytes = num_bytes & ~(root->sectorsize - 1);
5385 num_bytes = max(num_bytes, min_alloc_size); 5474 num_bytes = max(num_bytes, min_alloc_size);
5386 do_chunk_alloc(trans, root->fs_info->extent_root, 5475 do_chunk_alloc(trans, root->fs_info->extent_root,
5387 num_bytes, data, 1); 5476 num_bytes, data, CHUNK_ALLOC_FORCE);
5388 goto again; 5477 goto again;
5389 } 5478 }
5390 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5479 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -5397,6 +5486,8 @@ again:
5397 dump_space_info(sinfo, num_bytes, 1); 5486 dump_space_info(sinfo, num_bytes, 1);
5398 } 5487 }
5399 5488
5489 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
5490
5400 return ret; 5491 return ret;
5401} 5492}
5402 5493
@@ -5412,12 +5503,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5412 return -ENOSPC; 5503 return -ENOSPC;
5413 } 5504 }
5414 5505
5415 ret = btrfs_discard_extent(root, start, len); 5506 if (btrfs_test_opt(root, DISCARD))
5507 ret = btrfs_discard_extent(root, start, len, NULL);
5416 5508
5417 btrfs_add_free_space(cache, start, len); 5509 btrfs_add_free_space(cache, start, len);
5418 update_reserved_bytes(cache, len, 0, 1); 5510 btrfs_update_reserved_bytes(cache, len, 0, 1);
5419 btrfs_put_block_group(cache); 5511 btrfs_put_block_group(cache);
5420 5512
5513 trace_btrfs_reserved_extent_free(root, start, len);
5514
5421 return ret; 5515 return ret;
5422} 5516}
5423 5517
@@ -5444,7 +5538,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5444 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 5538 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
5445 5539
5446 path = btrfs_alloc_path(); 5540 path = btrfs_alloc_path();
5447 BUG_ON(!path); 5541 if (!path)
5542 return -ENOMEM;
5448 5543
5449 path->leave_spinning = 1; 5544 path->leave_spinning = 1;
5450 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5545 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5614,7 +5709,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5614 put_caching_control(caching_ctl); 5709 put_caching_control(caching_ctl);
5615 } 5710 }
5616 5711
5617 ret = update_reserved_bytes(block_group, ins->offset, 1, 1); 5712 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
5618 BUG_ON(ret); 5713 BUG_ON(ret);
5619 btrfs_put_block_group(block_group); 5714 btrfs_put_block_group(block_group);
5620 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5715 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -6047,6 +6142,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6047 if (reada && level == 1) 6142 if (reada && level == 1)
6048 reada_walk_down(trans, root, wc, path); 6143 reada_walk_down(trans, root, wc, path);
6049 next = read_tree_block(root, bytenr, blocksize, generation); 6144 next = read_tree_block(root, bytenr, blocksize, generation);
6145 if (!next)
6146 return -EIO;
6050 btrfs_tree_lock(next); 6147 btrfs_tree_lock(next);
6051 btrfs_set_lock_blocking(next); 6148 btrfs_set_lock_blocking(next);
6052 } 6149 }
@@ -6438,10 +6535,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6438 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6535 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
6439 6536
6440 path = btrfs_alloc_path(); 6537 path = btrfs_alloc_path();
6441 BUG_ON(!path); 6538 if (!path)
6539 return -ENOMEM;
6442 6540
6443 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6541 wc = kzalloc(sizeof(*wc), GFP_NOFS);
6444 BUG_ON(!wc); 6542 if (!wc) {
6543 btrfs_free_path(path);
6544 return -ENOMEM;
6545 }
6445 6546
6446 btrfs_assert_tree_locked(parent); 6547 btrfs_assert_tree_locked(parent);
6447 parent_level = btrfs_header_level(parent); 6548 parent_level = btrfs_header_level(parent);
@@ -6899,7 +7000,11 @@ static noinline int get_new_locations(struct inode *reloc_inode,
6899 } 7000 }
6900 7001
6901 path = btrfs_alloc_path(); 7002 path = btrfs_alloc_path();
6902 BUG_ON(!path); 7003 if (!path) {
7004 if (exts != *extents)
7005 kfree(exts);
7006 return -ENOMEM;
7007 }
6903 7008
6904 cur_pos = extent_key->objectid - offset; 7009 cur_pos = extent_key->objectid - offset;
6905 last_byte = extent_key->objectid + extent_key->offset; 7010 last_byte = extent_key->objectid + extent_key->offset;
@@ -6941,6 +7046,10 @@ static noinline int get_new_locations(struct inode *reloc_inode,
6941 struct disk_extent *old = exts; 7046 struct disk_extent *old = exts;
6942 max *= 2; 7047 max *= 2;
6943 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 7048 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
7049 if (!exts) {
7050 ret = -ENOMEM;
7051 goto out;
7052 }
6944 memcpy(exts, old, sizeof(*exts) * nr); 7053 memcpy(exts, old, sizeof(*exts) * nr);
6945 if (old != *extents) 7054 if (old != *extents)
6946 kfree(old); 7055 kfree(old);
@@ -7423,7 +7532,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7423 int ret; 7532 int ret;
7424 7533
7425 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 7534 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7426 BUG_ON(!new_extent); 7535 if (!new_extent)
7536 return -ENOMEM;
7427 7537
7428 ref = btrfs_lookup_leaf_ref(root, leaf->start); 7538 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7429 BUG_ON(!ref); 7539 BUG_ON(!ref);
@@ -7609,7 +7719,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7609 7719
7610 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 7720 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7611 BUG_ON(!reloc_root); 7721 BUG_ON(!reloc_root);
7612 btrfs_orphan_cleanup(reloc_root); 7722 ret = btrfs_orphan_cleanup(reloc_root);
7723 BUG_ON(ret);
7613 return 0; 7724 return 0;
7614} 7725}
7615 7726
@@ -7627,7 +7738,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7627 return 0; 7738 return 0;
7628 7739
7629 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 7740 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7630 BUG_ON(!root_item); 7741 if (!root_item)
7742 return -ENOMEM;
7631 7743
7632 ret = btrfs_copy_root(trans, root, root->commit_root, 7744 ret = btrfs_copy_root(trans, root, root->commit_root,
7633 &eb, BTRFS_TREE_RELOC_OBJECTID); 7745 &eb, BTRFS_TREE_RELOC_OBJECTID);
@@ -7653,7 +7765,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7653 7765
7654 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 7766 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7655 &root_key); 7767 &root_key);
7656 BUG_ON(!reloc_root); 7768 BUG_ON(IS_ERR(reloc_root));
7657 reloc_root->last_trans = trans->transid; 7769 reloc_root->last_trans = trans->transid;
7658 reloc_root->commit_root = NULL; 7770 reloc_root->commit_root = NULL;
7659 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 7771 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
@@ -7906,6 +8018,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7906 8018
7907 eb = read_tree_block(found_root, block_start, 8019 eb = read_tree_block(found_root, block_start,
7908 block_size, 0); 8020 block_size, 0);
8021 if (!eb) {
8022 ret = -EIO;
8023 goto out;
8024 }
7909 btrfs_tree_lock(eb); 8025 btrfs_tree_lock(eb);
7910 BUG_ON(level != btrfs_header_level(eb)); 8026 BUG_ON(level != btrfs_header_level(eb));
7911 8027
@@ -8061,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8061 8177
8062 alloc_flags = update_block_group_flags(root, cache->flags); 8178 alloc_flags = update_block_group_flags(root, cache->flags);
8063 if (alloc_flags != cache->flags) 8179 if (alloc_flags != cache->flags)
8064 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8180 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8181 CHUNK_ALLOC_FORCE);
8065 8182
8066 ret = set_block_group_ro(cache); 8183 ret = set_block_group_ro(cache);
8067 if (!ret) 8184 if (!ret)
8068 goto out; 8185 goto out;
8069 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 8186 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8070 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8187 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8188 CHUNK_ALLOC_FORCE);
8071 if (ret < 0) 8189 if (ret < 0)
8072 goto out; 8190 goto out;
8073 ret = set_block_group_ro(cache); 8191 ret = set_block_group_ro(cache);
@@ -8080,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8080 struct btrfs_root *root, u64 type) 8198 struct btrfs_root *root, u64 type)
8081{ 8199{
8082 u64 alloc_flags = get_alloc_profile(root, type); 8200 u64 alloc_flags = get_alloc_profile(root, type);
8083 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8201 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8202 CHUNK_ALLOC_FORCE);
8084} 8203}
8085 8204
8086/* 8205/*
@@ -8621,6 +8740,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8621 BUG_ON(!block_group); 8740 BUG_ON(!block_group);
8622 BUG_ON(!block_group->ro); 8741 BUG_ON(!block_group->ro);
8623 8742
8743 /*
8744 * Free the reserved super bytes from this block group before
8745 * remove it.
8746 */
8747 free_excluded_extents(root, block_group);
8748
8624 memcpy(&key, &block_group->key, sizeof(key)); 8749 memcpy(&key, &block_group->key, sizeof(key));
8625 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 8750 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8626 BTRFS_BLOCK_GROUP_RAID1 | 8751 BTRFS_BLOCK_GROUP_RAID1 |
@@ -8724,13 +8849,84 @@ out:
8724 return ret; 8849 return ret;
8725} 8850}
8726 8851
8852int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
8853{
8854 struct btrfs_space_info *space_info;
8855 int ret;
8856
8857 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0,
8858 &space_info);
8859 if (ret)
8860 return ret;
8861
8862 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0,
8863 &space_info);
8864 if (ret)
8865 return ret;
8866
8867 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0,
8868 &space_info);
8869 if (ret)
8870 return ret;
8871
8872 return ret;
8873}
8874
8727int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 8875int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
8728{ 8876{
8729 return unpin_extent_range(root, start, end); 8877 return unpin_extent_range(root, start, end);
8730} 8878}
8731 8879
8732int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 8880int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
8733 u64 num_bytes) 8881 u64 num_bytes, u64 *actual_bytes)
8882{
8883 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
8884}
8885
8886int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
8734{ 8887{
8735 return btrfs_discard_extent(root, bytenr, num_bytes); 8888 struct btrfs_fs_info *fs_info = root->fs_info;
8889 struct btrfs_block_group_cache *cache = NULL;
8890 u64 group_trimmed;
8891 u64 start;
8892 u64 end;
8893 u64 trimmed = 0;
8894 int ret = 0;
8895
8896 cache = btrfs_lookup_block_group(fs_info, range->start);
8897
8898 while (cache) {
8899 if (cache->key.objectid >= (range->start + range->len)) {
8900 btrfs_put_block_group(cache);
8901 break;
8902 }
8903
8904 start = max(range->start, cache->key.objectid);
8905 end = min(range->start + range->len,
8906 cache->key.objectid + cache->key.offset);
8907
8908 if (end - start >= range->minlen) {
8909 if (!block_group_cache_done(cache)) {
8910 ret = cache_block_group(cache, NULL, root, 0);
8911 if (!ret)
8912 wait_block_group_cache_done(cache);
8913 }
8914 ret = btrfs_trim_block_group(cache,
8915 &group_trimmed,
8916 start,
8917 end,
8918 range->minlen);
8919
8920 trimmed += group_trimmed;
8921 if (ret) {
8922 btrfs_put_block_group(cache);
8923 break;
8924 }
8925 }
8926
8927 cache = next_block_group(fs_info->tree_root, cache);
8928 }
8929
8930 range->len = trimmed;
8931 return ret;
8736} 8932}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 714adc4ac4c2..315138605088 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
690 } 690 }
691} 691}
692 692
693static void uncache_state(struct extent_state **cached_ptr)
694{
695 if (cached_ptr && (*cached_ptr)) {
696 struct extent_state *state = *cached_ptr;
697 *cached_ptr = NULL;
698 free_extent_state(state);
699 }
700}
701
693/* 702/*
694 * set some bits on a range in the tree. This may require allocations or 703 * set some bits on a range in the tree. This may require allocations or
695 * sleeping, so the gfp mask is used to indicate what is allowed. 704 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
940} 949}
941 950
942int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 951int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
943 gfp_t mask) 952 struct extent_state **cached_state, gfp_t mask)
944{ 953{
945 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 954 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
946 NULL, mask); 955 NULL, cached_state, mask);
947} 956}
948 957
949static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 958static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1012 mask); 1021 mask);
1013} 1022}
1014 1023
1015int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1024int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1016 gfp_t mask)
1017{ 1025{
1018 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1026 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1019 mask); 1027 mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1735 1743
1736 do { 1744 do {
1737 struct page *page = bvec->bv_page; 1745 struct page *page = bvec->bv_page;
1746 struct extent_state *cached = NULL;
1747 struct extent_state *state;
1748
1738 tree = &BTRFS_I(page->mapping->host)->io_tree; 1749 tree = &BTRFS_I(page->mapping->host)->io_tree;
1739 1750
1740 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1751 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1749 if (++bvec <= bvec_end) 1760 if (++bvec <= bvec_end)
1750 prefetchw(&bvec->bv_page->flags); 1761 prefetchw(&bvec->bv_page->flags);
1751 1762
1763 spin_lock(&tree->lock);
1764 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
1765 if (state && state->start == start) {
1766 /*
1767 * take a reference on the state, unlock will drop
1768 * the ref
1769 */
1770 cache_state(state, &cached);
1771 }
1772 spin_unlock(&tree->lock);
1773
1752 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1774 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1753 ret = tree->ops->readpage_end_io_hook(page, start, end, 1775 ret = tree->ops->readpage_end_io_hook(page, start, end,
1754 NULL); 1776 state);
1755 if (ret) 1777 if (ret)
1756 uptodate = 0; 1778 uptodate = 0;
1757 } 1779 }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1764 test_bit(BIO_UPTODATE, &bio->bi_flags); 1786 test_bit(BIO_UPTODATE, &bio->bi_flags);
1765 if (err) 1787 if (err)
1766 uptodate = 0; 1788 uptodate = 0;
1789 uncache_state(&cached);
1767 continue; 1790 continue;
1768 } 1791 }
1769 } 1792 }
1770 1793
1771 if (uptodate) { 1794 if (uptodate) {
1772 set_extent_uptodate(tree, start, end, 1795 set_extent_uptodate(tree, start, end, &cached,
1773 GFP_ATOMIC); 1796 GFP_ATOMIC);
1774 } 1797 }
1775 unlock_extent(tree, start, end, GFP_ATOMIC); 1798 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1776 1799
1777 if (whole_page) { 1800 if (whole_page) {
1778 if (uptodate) { 1801 if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1811 1834
1812 do { 1835 do {
1813 struct page *page = bvec->bv_page; 1836 struct page *page = bvec->bv_page;
1837 struct extent_state *cached = NULL;
1814 tree = &BTRFS_I(page->mapping->host)->io_tree; 1838 tree = &BTRFS_I(page->mapping->host)->io_tree;
1815 1839
1816 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1840 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1821 prefetchw(&bvec->bv_page->flags); 1845 prefetchw(&bvec->bv_page->flags);
1822 1846
1823 if (uptodate) { 1847 if (uptodate) {
1824 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1848 set_extent_uptodate(tree, start, end, &cached,
1849 GFP_ATOMIC);
1825 } else { 1850 } else {
1826 ClearPageUptodate(page); 1851 ClearPageUptodate(page);
1827 SetPageError(page); 1852 SetPageError(page);
1828 } 1853 }
1829 1854
1830 unlock_extent(tree, start, end, GFP_ATOMIC); 1855 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1831 1856
1832 } while (bvec >= bio->bi_io_vec); 1857 } while (bvec >= bio->bi_io_vec);
1833 1858
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2016 while (cur <= end) { 2041 while (cur <= end) {
2017 if (cur >= last_byte) { 2042 if (cur >= last_byte) {
2018 char *userpage; 2043 char *userpage;
2044 struct extent_state *cached = NULL;
2045
2019 iosize = PAGE_CACHE_SIZE - page_offset; 2046 iosize = PAGE_CACHE_SIZE - page_offset;
2020 userpage = kmap_atomic(page, KM_USER0); 2047 userpage = kmap_atomic(page, KM_USER0);
2021 memset(userpage + page_offset, 0, iosize); 2048 memset(userpage + page_offset, 0, iosize);
2022 flush_dcache_page(page); 2049 flush_dcache_page(page);
2023 kunmap_atomic(userpage, KM_USER0); 2050 kunmap_atomic(userpage, KM_USER0);
2024 set_extent_uptodate(tree, cur, cur + iosize - 1, 2051 set_extent_uptodate(tree, cur, cur + iosize - 1,
2025 GFP_NOFS); 2052 &cached, GFP_NOFS);
2026 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2053 unlock_extent_cached(tree, cur, cur + iosize - 1,
2054 &cached, GFP_NOFS);
2027 break; 2055 break;
2028 } 2056 }
2029 em = get_extent(inode, page, page_offset, cur, 2057 em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2063 /* we've found a hole, just zero and go on */ 2091 /* we've found a hole, just zero and go on */
2064 if (block_start == EXTENT_MAP_HOLE) { 2092 if (block_start == EXTENT_MAP_HOLE) {
2065 char *userpage; 2093 char *userpage;
2094 struct extent_state *cached = NULL;
2095
2066 userpage = kmap_atomic(page, KM_USER0); 2096 userpage = kmap_atomic(page, KM_USER0);
2067 memset(userpage + page_offset, 0, iosize); 2097 memset(userpage + page_offset, 0, iosize);
2068 flush_dcache_page(page); 2098 flush_dcache_page(page);
2069 kunmap_atomic(userpage, KM_USER0); 2099 kunmap_atomic(userpage, KM_USER0);
2070 2100
2071 set_extent_uptodate(tree, cur, cur + iosize - 1, 2101 set_extent_uptodate(tree, cur, cur + iosize - 1,
2072 GFP_NOFS); 2102 &cached, GFP_NOFS);
2073 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2103 unlock_extent_cached(tree, cur, cur + iosize - 1,
2104 &cached, GFP_NOFS);
2074 cur = cur + iosize; 2105 cur = cur + iosize;
2075 page_offset += iosize; 2106 page_offset += iosize;
2076 continue; 2107 continue;
@@ -2188,10 +2219,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2188 unsigned long nr_written = 0; 2219 unsigned long nr_written = 0;
2189 2220
2190 if (wbc->sync_mode == WB_SYNC_ALL) 2221 if (wbc->sync_mode == WB_SYNC_ALL)
2191 write_flags = WRITE_SYNC_PLUG; 2222 write_flags = WRITE_SYNC;
2192 else 2223 else
2193 write_flags = WRITE; 2224 write_flags = WRITE;
2194 2225
2226 trace___extent_writepage(page, inode, wbc);
2227
2195 WARN_ON(!PageLocked(page)); 2228 WARN_ON(!PageLocked(page));
2196 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2229 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2197 if (page->index > end_index || 2230 if (page->index > end_index ||
@@ -2787,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
2787 iocount++; 2820 iocount++;
2788 block_start = block_start + iosize; 2821 block_start = block_start + iosize;
2789 } else { 2822 } else {
2790 set_extent_uptodate(tree, block_start, cur_end, 2823 struct extent_state *cached = NULL;
2824
2825 set_extent_uptodate(tree, block_start, cur_end, &cached,
2791 GFP_NOFS); 2826 GFP_NOFS);
2792 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2827 unlock_extent_cached(tree, block_start, cur_end,
2828 &cached, GFP_NOFS);
2793 block_start = cur_end + 1; 2829 block_start = cur_end + 1;
2794 } 2830 }
2795 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2831 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3455,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3455 num_pages = num_extent_pages(eb->start, eb->len); 3491 num_pages = num_extent_pages(eb->start, eb->len);
3456 3492
3457 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3493 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3458 GFP_NOFS); 3494 NULL, GFP_NOFS);
3459 for (i = 0; i < num_pages; i++) { 3495 for (i = 0; i < num_pages; i++) {
3460 page = extent_buffer_page(eb, i); 3496 page = extent_buffer_page(eb, i);
3461 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3497 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3690,6 +3726,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3690 "wanted %lu %lu\n", (unsigned long long)eb->start, 3726 "wanted %lu %lu\n", (unsigned long long)eb->start,
3691 eb->len, start, min_len); 3727 eb->len, start, min_len);
3692 WARN_ON(1); 3728 WARN_ON(1);
3729 return -EINVAL;
3693 } 3730 }
3694 3731
3695 p = extent_buffer_page(eb, i); 3732 p = extent_buffer_page(eb, i);
@@ -3882,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3882 kunmap_atomic(dst_kaddr, KM_USER0); 3919 kunmap_atomic(dst_kaddr, KM_USER0);
3883} 3920}
3884 3921
3922static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3923{
3924 unsigned long distance = (src > dst) ? src - dst : dst - src;
3925 return distance < len;
3926}
3927
3885static void copy_pages(struct page *dst_page, struct page *src_page, 3928static void copy_pages(struct page *dst_page, struct page *src_page,
3886 unsigned long dst_off, unsigned long src_off, 3929 unsigned long dst_off, unsigned long src_off,
3887 unsigned long len) 3930 unsigned long len)
@@ -3889,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3889 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3932 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3890 char *src_kaddr; 3933 char *src_kaddr;
3891 3934
3892 if (dst_page != src_page) 3935 if (dst_page != src_page) {
3893 src_kaddr = kmap_atomic(src_page, KM_USER1); 3936 src_kaddr = kmap_atomic(src_page, KM_USER1);
3894 else 3937 } else {
3895 src_kaddr = dst_kaddr; 3938 src_kaddr = dst_kaddr;
3939 BUG_ON(areas_overlap(src_off, dst_off, len));
3940 }
3896 3941
3897 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3942 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3898 kunmap_atomic(dst_kaddr, KM_USER0); 3943 kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3967,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3967 "len %lu len %lu\n", dst_offset, len, dst->len); 4012 "len %lu len %lu\n", dst_offset, len, dst->len);
3968 BUG_ON(1); 4013 BUG_ON(1);
3969 } 4014 }
3970 if (dst_offset < src_offset) { 4015 if (!areas_overlap(src_offset, dst_offset, len)) {
3971 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4016 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3972 return; 4017 return;
3973 } 4018 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 9318dfefd59c..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -31,6 +31,7 @@
31#define EXTENT_BUFFER_UPTODATE 0 31#define EXTENT_BUFFER_UPTODATE 0
32#define EXTENT_BUFFER_BLOCKING 1 32#define EXTENT_BUFFER_BLOCKING 1
33#define EXTENT_BUFFER_DIRTY 2 33#define EXTENT_BUFFER_DIRTY 2
34#define EXTENT_BUFFER_CORRUPT 3
34 35
35/* these are flags for extent_clear_unlock_delalloc */ 36/* these are flags for extent_clear_unlock_delalloc */
36#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 37#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@@ -207,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
207 int bits, int exclusive_bits, u64 *failed_start, 208 int bits, int exclusive_bits, u64 *failed_start,
208 struct extent_state **cached_state, gfp_t mask); 209 struct extent_state **cached_state, gfp_t mask);
209int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
210 gfp_t mask); 211 struct extent_state **cached_state, gfp_t mask);
211int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
212 gfp_t mask); 213 gfp_t mask);
213int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2b6c12e983b3..a24a3f2fa13e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -243,7 +243,7 @@ out:
243 * Insert @em into @tree or perform a simple forward/backward merge with 243 * Insert @em into @tree or perform a simple forward/backward merge with
244 * existing mappings. The extent_map struct passed in will be inserted 244 * existing mappings. The extent_map struct passed in will be inserted
245 * into the tree directly, with an additional reference taken, or a 245 * into the tree directly, with an additional reference taken, or a
246 * reference dropped if the merge attempt was successfull. 246 * reference dropped if the merge attempt was successful.
247 */ 247 */
248int add_extent_mapping(struct extent_map_tree *tree, 248int add_extent_mapping(struct extent_map_tree *tree,
249 struct extent_map *em) 249 struct extent_map *em)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 4f19a3e1bf32..a6a9d4e8b491 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
48 struct extent_buffer *leaf; 48 struct extent_buffer *leaf;
49 49
50 path = btrfs_alloc_path(); 50 path = btrfs_alloc_path();
51 BUG_ON(!path); 51 if (!path)
52 return -ENOMEM;
52 file_key.objectid = objectid; 53 file_key.objectid = objectid;
53 file_key.offset = pos; 54 file_key.offset = pos;
54 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); 55 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
169 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 170 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
170 171
171 path = btrfs_alloc_path(); 172 path = btrfs_alloc_path();
173 if (!path)
174 return -ENOMEM;
172 if (bio->bi_size > PAGE_CACHE_SIZE * 8) 175 if (bio->bi_size > PAGE_CACHE_SIZE * 8)
173 path->reada = 2; 176 path->reada = 2;
174 177
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f447b783bb84..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -45,14 +45,14 @@
45 * and be replaced with calls into generic code. 45 * and be replaced with calls into generic code.
46 */ 46 */
47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, 47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48 int write_bytes, 48 size_t write_bytes,
49 struct page **prepared_pages, 49 struct page **prepared_pages,
50 struct iov_iter *i) 50 struct iov_iter *i)
51{ 51{
52 size_t copied = 0; 52 size_t copied = 0;
53 size_t total_copied = 0;
53 int pg = 0; 54 int pg = 0;
54 int offset = pos & (PAGE_CACHE_SIZE - 1); 55 int offset = pos & (PAGE_CACHE_SIZE - 1);
55 int total_copied = 0;
56 56
57 while (write_bytes > 0) { 57 while (write_bytes > 0) {
58 size_t count = min_t(size_t, 58 size_t count = min_t(size_t,
@@ -88,9 +88,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
88 total_copied += copied; 88 total_copied += copied;
89 89
90 /* Return to btrfs_file_aio_write to fault page */ 90 /* Return to btrfs_file_aio_write to fault page */
91 if (unlikely(copied == 0)) { 91 if (unlikely(copied == 0))
92 break; 92 break;
93 }
94 93
95 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { 94 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
96 offset += copied; 95 offset += copied;
@@ -105,12 +104,10 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
105/* 104/*
106 * unlocks pages after btrfs_file_write is done with them 105 * unlocks pages after btrfs_file_write is done with them
107 */ 106 */
108static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) 107void btrfs_drop_pages(struct page **pages, size_t num_pages)
109{ 108{
110 size_t i; 109 size_t i;
111 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
112 if (!pages[i])
113 break;
114 /* page checked is some magic around finding pages that 111 /* page checked is some magic around finding pages that
115 * have been modified without going through btrfs_set_page_dirty 112 * have been modified without going through btrfs_set_page_dirty
116 * clear it here 113 * clear it here
@@ -130,17 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
130 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
131 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
132 */ 129 */
133static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, 130int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
134 struct btrfs_root *root, 131 struct page **pages, size_t num_pages,
135 struct file *file, 132 loff_t pos, size_t write_bytes,
136 struct page **pages, 133 struct extent_state **cached)
137 size_t num_pages,
138 loff_t pos,
139 size_t write_bytes)
140{ 134{
141 int err = 0; 135 int err = 0;
142 int i; 136 int i;
143 struct inode *inode = fdentry(file)->d_inode;
144 u64 num_bytes; 137 u64 num_bytes;
145 u64 start_pos; 138 u64 start_pos;
146 u64 end_of_last_block; 139 u64 end_of_last_block;
@@ -153,8 +146,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
153 146
154 end_of_last_block = start_pos + num_bytes - 1; 147 end_of_last_block = start_pos + num_bytes - 1;
155 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 148 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
156 NULL); 149 cached);
157 BUG_ON(err); 150 if (err)
151 return err;
158 152
159 for (i = 0; i < num_pages; i++) { 153 for (i = 0; i < num_pages; i++) {
160 struct page *p = pages[i]; 154 struct page *p = pages[i];
@@ -162,13 +156,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
162 ClearPageChecked(p); 156 ClearPageChecked(p);
163 set_page_dirty(p); 157 set_page_dirty(p);
164 } 158 }
165 if (end_pos > isize) { 159
160 /*
161 * we've only changed i_size in ram, and we haven't updated
162 * the disk i_size. There is no need to log the inode
163 * at this time.
164 */
165 if (end_pos > isize)
166 i_size_write(inode, end_pos); 166 i_size_write(inode, end_pos);
167 /* we've only changed i_size in ram, and we haven't updated
168 * the disk i_size. There is no need to log the inode
169 * at this time.
170 */
171 }
172 return 0; 167 return 0;
173} 168}
174 169
@@ -610,6 +605,8 @@ again:
610 key.offset = split; 605 key.offset = split;
611 606
612 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 607 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
608 if (ret < 0)
609 goto out;
613 if (ret > 0 && path->slots[0] > 0) 610 if (ret > 0 && path->slots[0] > 0)
614 path->slots[0]--; 611 path->slots[0]--;
615 612
@@ -819,12 +816,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
819 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; 816 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
820 817
821 if (start_pos > inode->i_size) { 818 if (start_pos > inode->i_size) {
822 err = btrfs_cont_expand(inode, start_pos); 819 err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
823 if (err) 820 if (err)
824 return err; 821 return err;
825 } 822 }
826 823
827 memset(pages, 0, num_pages * sizeof(struct page *));
828again: 824again:
829 for (i = 0; i < num_pages; i++) { 825 for (i = 0; i < num_pages; i++) {
830 pages[i] = grab_cache_page(inode->i_mapping, index + i); 826 pages[i] = grab_cache_page(inode->i_mapping, index + i);
@@ -896,156 +892,71 @@ fail:
896 892
897} 893}
898 894
899static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 895static noinline ssize_t __btrfs_buffered_write(struct file *file,
900 const struct iovec *iov, 896 struct iov_iter *i,
901 unsigned long nr_segs, loff_t pos) 897 loff_t pos)
902{ 898{
903 struct file *file = iocb->ki_filp;
904 struct inode *inode = fdentry(file)->d_inode; 899 struct inode *inode = fdentry(file)->d_inode;
905 struct btrfs_root *root = BTRFS_I(inode)->root; 900 struct btrfs_root *root = BTRFS_I(inode)->root;
906 struct page **pages = NULL; 901 struct page **pages = NULL;
907 struct iov_iter i;
908 loff_t *ppos = &iocb->ki_pos;
909 loff_t start_pos;
910 ssize_t num_written = 0;
911 ssize_t err = 0;
912 size_t count;
913 size_t ocount;
914 int ret = 0;
915 int nrptrs;
916 unsigned long first_index; 902 unsigned long first_index;
917 unsigned long last_index; 903 unsigned long last_index;
918 int will_write; 904 size_t num_written = 0;
919 int buffered = 0; 905 int nrptrs;
920 int copied = 0; 906 int ret = 0;
921 int dirty_pages = 0;
922
923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
924 (file->f_flags & O_DIRECT));
925
926 start_pos = pos;
927
928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
929
930 mutex_lock(&inode->i_mutex);
931
932 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
933 if (err)
934 goto out;
935 count = ocount;
936
937 current->backing_dev_info = inode->i_mapping->backing_dev_info;
938 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
939 if (err)
940 goto out;
941
942 if (count == 0)
943 goto out;
944
945 err = file_remove_suid(file);
946 if (err)
947 goto out;
948
949 /*
950 * If BTRFS flips readonly due to some impossible error
951 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
952 * although we have opened a file as writable, we have
953 * to stop this write operation to ensure FS consistency.
954 */
955 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
956 err = -EROFS;
957 goto out;
958 }
959
960 file_update_time(file);
961 BTRFS_I(inode)->sequence++;
962
963 if (unlikely(file->f_flags & O_DIRECT)) {
964 num_written = generic_file_direct_write(iocb, iov, &nr_segs,
965 pos, ppos, count,
966 ocount);
967 /*
968 * the generic O_DIRECT will update in-memory i_size after the
969 * DIOs are done. But our endio handlers that update the on
970 * disk i_size never update past the in memory i_size. So we
971 * need one more update here to catch any additions to the
972 * file
973 */
974 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
975 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
976 mark_inode_dirty(inode);
977 }
978
979 if (num_written < 0) {
980 ret = num_written;
981 num_written = 0;
982 goto out;
983 } else if (num_written == count) {
984 /* pick up pos changes done by the generic code */
985 pos = *ppos;
986 goto out;
987 }
988 /*
989 * We are going to do buffered for the rest of the range, so we
990 * need to make sure to invalidate the buffered pages when we're
991 * done.
992 */
993 buffered = 1;
994 pos += num_written;
995 }
996 907
997 iov_iter_init(&i, iov, nr_segs, count, num_written); 908 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
998 nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
999 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 909 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
1000 (sizeof(struct page *))); 910 (sizeof(struct page *)));
1001 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 911 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1002 if (!pages) { 912 if (!pages)
1003 ret = -ENOMEM; 913 return -ENOMEM;
1004 goto out;
1005 }
1006
1007 /* generic_write_checks can change our pos */
1008 start_pos = pos;
1009 914
1010 first_index = pos >> PAGE_CACHE_SHIFT; 915 first_index = pos >> PAGE_CACHE_SHIFT;
1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 916 last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
1012 917
1013 while (iov_iter_count(&i) > 0) { 918 while (iov_iter_count(i) > 0) {
1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 919 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1015 size_t write_bytes = min(iov_iter_count(&i), 920 size_t write_bytes = min(iov_iter_count(i),
1016 nrptrs * (size_t)PAGE_CACHE_SIZE - 921 nrptrs * (size_t)PAGE_CACHE_SIZE -
1017 offset); 922 offset);
1018 size_t num_pages = (write_bytes + offset + 923 size_t num_pages = (write_bytes + offset +
1019 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 924 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
925 size_t dirty_pages;
926 size_t copied;
1020 927
1021 WARN_ON(num_pages > nrptrs); 928 WARN_ON(num_pages > nrptrs);
1022 memset(pages, 0, sizeof(struct page *) * nrptrs);
1023 929
1024 /* 930 /*
1025 * Fault pages before locking them in prepare_pages 931 * Fault pages before locking them in prepare_pages
1026 * to avoid recursive lock 932 * to avoid recursive lock
1027 */ 933 */
1028 if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { 934 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1029 ret = -EFAULT; 935 ret = -EFAULT;
1030 goto out; 936 break;
1031 } 937 }
1032 938
1033 ret = btrfs_delalloc_reserve_space(inode, 939 ret = btrfs_delalloc_reserve_space(inode,
1034 num_pages << PAGE_CACHE_SHIFT); 940 num_pages << PAGE_CACHE_SHIFT);
1035 if (ret) 941 if (ret)
1036 goto out; 942 break;
1037 943
944 /*
945 * This is going to setup the pages array with the number of
946 * pages we want, so we don't really need to worry about the
947 * contents of pages from loop to loop
948 */
1038 ret = prepare_pages(root, file, pages, num_pages, 949 ret = prepare_pages(root, file, pages, num_pages,
1039 pos, first_index, last_index, 950 pos, first_index, last_index,
1040 write_bytes); 951 write_bytes);
1041 if (ret) { 952 if (ret) {
1042 btrfs_delalloc_release_space(inode, 953 btrfs_delalloc_release_space(inode,
1043 num_pages << PAGE_CACHE_SHIFT); 954 num_pages << PAGE_CACHE_SHIFT);
1044 goto out; 955 break;
1045 } 956 }
1046 957
1047 copied = btrfs_copy_from_user(pos, num_pages, 958 copied = btrfs_copy_from_user(pos, num_pages,
1048 write_bytes, pages, &i); 959 write_bytes, pages, i);
1049 960
1050 /* 961 /*
1051 * if we have trouble faulting in the pages, fall 962 * if we have trouble faulting in the pages, fall
@@ -1061,6 +972,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1061 PAGE_CACHE_SIZE - 1) >> 972 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT; 973 PAGE_CACHE_SHIFT;
1063 974
975 /*
976 * If we had a short copy we need to release the excess delaloc
977 * bytes we reserved. We need to increment outstanding_extents
978 * because btrfs_delalloc_release_space will decrement it, but
979 * we still have an outstanding extent for the chunk we actually
980 * managed to copy.
981 */
1064 if (num_pages > dirty_pages) { 982 if (num_pages > dirty_pages) {
1065 if (copied > 0) 983 if (copied > 0)
1066 atomic_inc( 984 atomic_inc(
@@ -1071,39 +989,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1071 } 989 }
1072 990
1073 if (copied > 0) { 991 if (copied > 0) {
1074 dirty_and_release_pages(NULL, root, file, pages, 992 ret = btrfs_dirty_pages(root, inode, pages,
1075 dirty_pages, pos, copied); 993 dirty_pages, pos, copied,
994 NULL);
995 if (ret) {
996 btrfs_delalloc_release_space(inode,
997 dirty_pages << PAGE_CACHE_SHIFT);
998 btrfs_drop_pages(pages, num_pages);
999 break;
1000 }
1076 } 1001 }
1077 1002
1078 btrfs_drop_pages(pages, num_pages); 1003 btrfs_drop_pages(pages, num_pages);
1079 1004
1080 if (copied > 0) { 1005 cond_resched();
1081 if (will_write) { 1006
1082 filemap_fdatawrite_range(inode->i_mapping, pos, 1007 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1083 pos + copied - 1); 1008 dirty_pages);
1084 } else { 1009 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1085 balance_dirty_pages_ratelimited_nr( 1010 btrfs_btree_balance_dirty(root, 1);
1086 inode->i_mapping, 1011 btrfs_throttle(root);
1087 dirty_pages);
1088 if (dirty_pages <
1089 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1090 btrfs_btree_balance_dirty(root, 1);
1091 btrfs_throttle(root);
1092 }
1093 }
1094 1012
1095 pos += copied; 1013 pos += copied;
1096 num_written += copied; 1014 num_written += copied;
1015 }
1097 1016
1098 cond_resched(); 1017 kfree(pages);
1018
1019 return num_written ? num_written : ret;
1020}
1021
1022static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1023 const struct iovec *iov,
1024 unsigned long nr_segs, loff_t pos,
1025 loff_t *ppos, size_t count, size_t ocount)
1026{
1027 struct file *file = iocb->ki_filp;
1028 struct inode *inode = fdentry(file)->d_inode;
1029 struct iov_iter i;
1030 ssize_t written;
1031 ssize_t written_buffered;
1032 loff_t endbyte;
1033 int err;
1034
1035 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1036 count, ocount);
1037
1038 /*
1039 * the generic O_DIRECT will update in-memory i_size after the
1040 * DIOs are done. But our endio handlers that update the on
1041 * disk i_size never update past the in memory i_size. So we
1042 * need one more update here to catch any additions to the
1043 * file
1044 */
1045 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1046 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1047 mark_inode_dirty(inode);
1099 } 1048 }
1049
1050 if (written < 0 || written == count)
1051 return written;
1052
1053 pos += written;
1054 count -= written;
1055 iov_iter_init(&i, iov, nr_segs, count, written);
1056 written_buffered = __btrfs_buffered_write(file, &i, pos);
1057 if (written_buffered < 0) {
1058 err = written_buffered;
1059 goto out;
1060 }
1061 endbyte = pos + written_buffered - 1;
1062 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
1063 if (err)
1064 goto out;
1065 written += written_buffered;
1066 *ppos = pos + written_buffered;
1067 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
1068 endbyte >> PAGE_CACHE_SHIFT);
1100out: 1069out:
1101 mutex_unlock(&inode->i_mutex); 1070 return written ? written : err;
1102 if (ret) 1071}
1103 err = ret;
1104 1072
1105 kfree(pages); 1073static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1106 *ppos = pos; 1074 const struct iovec *iov,
1075 unsigned long nr_segs, loff_t pos)
1076{
1077 struct file *file = iocb->ki_filp;
1078 struct inode *inode = fdentry(file)->d_inode;
1079 struct btrfs_root *root = BTRFS_I(inode)->root;
1080 loff_t *ppos = &iocb->ki_pos;
1081 ssize_t num_written = 0;
1082 ssize_t err = 0;
1083 size_t count, ocount;
1084
1085 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1086
1087 mutex_lock(&inode->i_mutex);
1088
1089 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
1090 if (err) {
1091 mutex_unlock(&inode->i_mutex);
1092 goto out;
1093 }
1094 count = ocount;
1095
1096 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1097 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1098 if (err) {
1099 mutex_unlock(&inode->i_mutex);
1100 goto out;
1101 }
1102
1103 if (count == 0) {
1104 mutex_unlock(&inode->i_mutex);
1105 goto out;
1106 }
1107
1108 err = file_remove_suid(file);
1109 if (err) {
1110 mutex_unlock(&inode->i_mutex);
1111 goto out;
1112 }
1113
1114 /*
1115 * If BTRFS flips readonly due to some impossible error
1116 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
1117 * although we have opened a file as writable, we have
1118 * to stop this write operation to ensure FS consistency.
1119 */
1120 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
1121 mutex_unlock(&inode->i_mutex);
1122 err = -EROFS;
1123 goto out;
1124 }
1125
1126 file_update_time(file);
1127 BTRFS_I(inode)->sequence++;
1128
1129 if (unlikely(file->f_flags & O_DIRECT)) {
1130 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
1131 pos, ppos, count, ocount);
1132 } else {
1133 struct iov_iter i;
1134
1135 iov_iter_init(&i, iov, nr_segs, count, num_written);
1136
1137 num_written = __btrfs_buffered_write(file, &i, pos);
1138 if (num_written > 0)
1139 *ppos = pos + num_written;
1140 }
1141
1142 mutex_unlock(&inode->i_mutex);
1107 1143
1108 /* 1144 /*
1109 * we want to make sure fsync finds this change 1145 * we want to make sure fsync finds this change
@@ -1118,43 +1154,12 @@ out:
1118 * one running right now. 1154 * one running right now.
1119 */ 1155 */
1120 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 1156 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1121 1157 if (num_written > 0 || num_written == -EIOCBQUEUED) {
1122 if (num_written > 0 && will_write) { 1158 err = generic_write_sync(file, pos, num_written);
1123 struct btrfs_trans_handle *trans; 1159 if (err < 0 && num_written > 0)
1124
1125 err = btrfs_wait_ordered_range(inode, start_pos, num_written);
1126 if (err)
1127 num_written = err; 1160 num_written = err;
1128
1129 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1130 trans = btrfs_start_transaction(root, 0);
1131 if (IS_ERR(trans)) {
1132 num_written = PTR_ERR(trans);
1133 goto done;
1134 }
1135 mutex_lock(&inode->i_mutex);
1136 ret = btrfs_log_dentry_safe(trans, root,
1137 file->f_dentry);
1138 mutex_unlock(&inode->i_mutex);
1139 if (ret == 0) {
1140 ret = btrfs_sync_log(trans, root);
1141 if (ret == 0)
1142 btrfs_end_transaction(trans, root);
1143 else
1144 btrfs_commit_transaction(trans, root);
1145 } else if (ret != BTRFS_NO_LOG_SYNC) {
1146 btrfs_commit_transaction(trans, root);
1147 } else {
1148 btrfs_end_transaction(trans, root);
1149 }
1150 }
1151 if (file->f_flags & O_DIRECT && buffered) {
1152 invalidate_mapping_pages(inode->i_mapping,
1153 start_pos >> PAGE_CACHE_SHIFT,
1154 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1155 }
1156 } 1161 }
1157done: 1162out:
1158 current->backing_dev_info = NULL; 1163 current->backing_dev_info = NULL;
1159 return num_written ? num_written : err; 1164 return num_written ? num_written : err;
1160} 1165}
@@ -1197,6 +1202,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1197 int ret = 0; 1202 int ret = 0;
1198 struct btrfs_trans_handle *trans; 1203 struct btrfs_trans_handle *trans;
1199 1204
1205 trace_btrfs_sync_file(file, datasync);
1200 1206
1201 /* we wait first, since the writeback may change the inode */ 1207 /* we wait first, since the writeback may change the inode */
1202 root->log_batch++; 1208 root->log_batch++;
@@ -1324,7 +1330,8 @@ static long btrfs_fallocate(struct file *file, int mode,
1324 goto out; 1330 goto out;
1325 1331
1326 if (alloc_start > inode->i_size) { 1332 if (alloc_start > inode->i_size) {
1327 ret = btrfs_cont_expand(inode, alloc_start); 1333 ret = btrfs_cont_expand(inode, i_size_read(inode),
1334 alloc_start);
1328 if (ret) 1335 if (ret)
1329 goto out; 1336 goto out;
1330 } 1337 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a0390657451b..11d2e9cea09e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -24,6 +24,7 @@
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h"
27 28
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
81 return ERR_PTR(-ENOENT); 82 return ERR_PTR(-ENOENT);
82 } 83 }
83 84
85 inode->i_mapping->flags &= ~__GFP_FS;
86
84 spin_lock(&block_group->lock); 87 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) { 88 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode); 89 block_group->inode = igrab(inode);
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
222 u64 num_entries; 225 u64 num_entries;
223 u64 num_bitmaps; 226 u64 num_bitmaps;
224 u64 generation; 227 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
225 u32 cur_crc = ~(u32)0; 229 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0; 230 pgoff_t index = 0;
227 unsigned long first_page_offset; 231 unsigned long first_page_offset;
@@ -393,7 +397,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
393 break; 397 break;
394 398
395 need_loop = 1; 399 need_loop = 1;
396 e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 400 e = kmem_cache_zalloc(btrfs_free_space_cachep,
401 GFP_NOFS);
397 if (!e) { 402 if (!e) {
398 kunmap(page); 403 kunmap(page);
399 unlock_page(page); 404 unlock_page(page);
@@ -405,7 +410,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
405 e->bytes = le64_to_cpu(entry->bytes); 410 e->bytes = le64_to_cpu(entry->bytes);
406 if (!e->bytes) { 411 if (!e->bytes) {
407 kunmap(page); 412 kunmap(page);
408 kfree(e); 413 kmem_cache_free(btrfs_free_space_cachep, e);
409 unlock_page(page); 414 unlock_page(page);
410 page_cache_release(page); 415 page_cache_release(page);
411 goto free_cache; 416 goto free_cache;
@@ -420,7 +425,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
420 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 425 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
421 if (!e->bitmap) { 426 if (!e->bitmap) {
422 kunmap(page); 427 kunmap(page);
423 kfree(e); 428 kmem_cache_free(
429 btrfs_free_space_cachep, e);
424 unlock_page(page); 430 unlock_page(page);
425 page_cache_release(page); 431 page_cache_release(page);
426 goto free_cache; 432 goto free_cache;
@@ -465,6 +471,17 @@ next:
465 index++; 471 index++;
466 } 472 }
467 473
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
468 ret = 1; 485 ret = 1;
469out: 486out:
470 kfree(checksums); 487 kfree(checksums);
@@ -491,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root,
491 struct inode *inode; 508 struct inode *inode;
492 struct rb_node *node; 509 struct rb_node *node;
493 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
494 struct page *page; 512 struct page *page;
495 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
514 struct btrfs_free_cluster *cluster = NULL;
515 struct extent_io_tree *unpin = NULL;
496 struct list_head bitmap_list; 516 struct list_head bitmap_list;
497 struct btrfs_key key; 517 struct btrfs_key key;
518 u64 start, end, len;
498 u64 bytes = 0; 519 u64 bytes = 0;
499 u32 *crc, *checksums; 520 u32 *crc, *checksums;
500 pgoff_t index = 0, last_index = 0;
501 unsigned long first_page_offset; 521 unsigned long first_page_offset;
502 int num_checksums; 522 int index = 0, num_pages = 0;
503 int entries = 0; 523 int entries = 0;
504 int bitmaps = 0; 524 int bitmaps = 0;
505 int ret = 0; 525 int ret = 0;
526 bool next_page = false;
527 bool out_of_space = false;
506 528
507 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
508 530
@@ -530,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root,
530 return 0; 552 return 0;
531 } 553 }
532 554
533 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
534 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
535 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
536 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
537 560
538 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
539 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
540 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
541 if (!crc) { 563 if (!crc) {
542 iput(inode); 564 iput(inode);
543 return 0; 565 return 0;
544 } 566 }
545 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
546 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
547 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
548 * our entries. 577 * our entries.
549 */ 578 */
550 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580
581 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster,
585 block_group_list);
586
587 /*
588 * We shouldn't have switched the pinned extents yet so this is the
589 * right one
590 */
591 unpin = root->fs_info->pinned_extents;
551 592
552 /* 593 /*
553 * Lock all pages first so we can lock the extent safely. 594 * Lock all pages first so we can lock the extent safely.
@@ -557,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
557 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
558 * know and don't freak out. 599 * know and don't freak out.
559 */ 600 */
560 while (index <= last_index) { 601 while (index < num_pages) {
561 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
562 if (!page) { 603 if (!page) {
563 pgoff_t i = 0; 604 int i;
564 605
565 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
566 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
567 unlock_page(page); 608 page_cache_release(pages[i]);
568 page_cache_release(page);
569 page_cache_release(page);
570 i++;
571 } 609 }
572 goto out_free; 610 goto out_free;
573 } 611 }
612 pages[index] = page;
574 index++; 613 index++;
575 } 614 }
576 615
@@ -578,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
578 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 617 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
579 0, &cached_state, GFP_NOFS); 618 0, &cached_state, GFP_NOFS);
580 619
620 /*
621 * When searching for pinned extents, we need to start at our start
622 * offset.
623 */
624 start = block_group->key.objectid;
625
581 /* Write out the extent entries */ 626 /* Write out the extent entries */
582 do { 627 do {
583 struct btrfs_free_space_entry *entry; 628 struct btrfs_free_space_entry *entry;
@@ -585,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root,
585 unsigned long offset = 0; 630 unsigned long offset = 0;
586 unsigned long start_offset = 0; 631 unsigned long start_offset = 0;
587 632
633 next_page = false;
634
588 if (index == 0) { 635 if (index == 0) {
589 start_offset = first_page_offset; 636 start_offset = first_page_offset;
590 offset = start_offset; 637 offset = start_offset;
591 } 638 }
592 639
593 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
594 646
595 addr = kmap(page); 647 addr = kmap(page);
596 entry = addr + start_offset; 648 entry = addr + start_offset;
597 649
598 memset(addr, 0, PAGE_CACHE_SIZE); 650 memset(addr, 0, PAGE_CACHE_SIZE);
599 while (1) { 651 while (node && !next_page) {
600 struct btrfs_free_space *e; 652 struct btrfs_free_space *e;
601 653
602 e = rb_entry(node, struct btrfs_free_space, offset_index); 654 e = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -612,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root,
612 entry->type = BTRFS_FREE_SPACE_EXTENT; 664 entry->type = BTRFS_FREE_SPACE_EXTENT;
613 } 665 }
614 node = rb_next(node); 666 node = rb_next(node);
615 if (!node) 667 if (!node && cluster) {
616 break; 668 node = rb_first(&cluster->root);
669 cluster = NULL;
670 }
617 offset += sizeof(struct btrfs_free_space_entry); 671 offset += sizeof(struct btrfs_free_space_entry);
618 if (offset + sizeof(struct btrfs_free_space_entry) >= 672 if (offset + sizeof(struct btrfs_free_space_entry) >=
619 PAGE_CACHE_SIZE) 673 PAGE_CACHE_SIZE)
674 next_page = true;
675 entry++;
676 }
677
678 /*
679 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space
681 */
682 while (!next_page && (start < block_group->key.objectid +
683 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY);
686 if (ret) {
687 ret = 0;
688 break;
689 }
690
691 /* This pinned extent is out of our range */
692 if (start >= block_group->key.objectid +
693 block_group->key.offset)
620 break; 694 break;
695
696 len = block_group->key.objectid +
697 block_group->key.offset - start;
698 len = min(len, end + 1 - start);
699
700 entries++;
701 entry->offset = cpu_to_le64(start);
702 entry->bytes = cpu_to_le64(len);
703 entry->type = BTRFS_FREE_SPACE_EXTENT;
704
705 start = end + 1;
706 offset += sizeof(struct btrfs_free_space_entry);
707 if (offset + sizeof(struct btrfs_free_space_entry) >=
708 PAGE_CACHE_SIZE)
709 next_page = true;
621 entry++; 710 entry++;
622 } 711 }
623 *crc = ~(u32)0; 712 *crc = ~(u32)0;
@@ -630,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
630 719
631 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
632 721
633 ClearPageChecked(page);
634 set_page_extent_mapped(page);
635 SetPageUptodate(page);
636 set_page_dirty(page);
637
638 /*
639 * We need to release our reference we got for grab_cache_page,
640 * except for the first page which will hold our checksums, we
641 * do that below.
642 */
643 if (index != 0) {
644 unlock_page(page);
645 page_cache_release(page);
646 }
647
648 page_cache_release(page);
649
650 index++; 722 index++;
651 } while (node); 723 } while (node || next_page);
652 724
653 /* Write out the bitmaps */ 725 /* Write out the bitmaps */
654 list_for_each_safe(pos, n, &bitmap_list) { 726 list_for_each_safe(pos, n, &bitmap_list) {
@@ -656,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
656 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
657 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
658 730
659 page = find_get_page(inode->i_mapping, index); 731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
735 page = pages[index];
660 736
661 addr = kmap(page); 737 addr = kmap(page);
662 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 738 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -667,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
667 crc++; 743 crc++;
668 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
669 745
670 ClearPageChecked(page);
671 set_page_extent_mapped(page);
672 SetPageUptodate(page);
673 set_page_dirty(page);
674 unlock_page(page);
675 page_cache_release(page);
676 page_cache_release(page);
677 list_del_init(&entry->list); 746 list_del_init(&entry->list);
678 index++; 747 index++;
679 } 748 }
680 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
681 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
682 while (index <= last_index) { 760 while (index < num_pages) {
683 void *addr; 761 void *addr;
684 762
685 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
686
687 addr = kmap(page); 764 addr = kmap(page);
688 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
689 kunmap(page); 766 kunmap(page);
690 ClearPageChecked(page);
691 set_page_extent_mapped(page);
692 SetPageUptodate(page);
693 set_page_dirty(page);
694 unlock_page(page);
695 page_cache_release(page);
696 page_cache_release(page);
697 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
698 index++; 768 index++;
699 } 769 }
700 770
701 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
702
703 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
704 { 772 {
705 void *addr; 773 void *addr;
706 u64 *gen; 774 u64 *gen;
707 775
708 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
709 777
710 addr = kmap(page); 778 addr = kmap(page);
711 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
712 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
713 *gen = trans->transid; 781 *gen = trans->transid;
714 kunmap(page); 782 kunmap(page);
715 ClearPageChecked(page);
716 set_page_extent_mapped(page);
717 SetPageUptodate(page);
718 set_page_dirty(page);
719 unlock_page(page);
720 page_cache_release(page);
721 page_cache_release(page);
722 } 783 }
723 BTRFS_I(inode)->generation = trans->transid;
724 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
725 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
726 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
727 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
728 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
729 799
730 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -775,6 +845,7 @@ out_free:
775 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
776 } 846 }
777 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
778 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
779 iput(inode); 850 iput(inode);
780 return ret; 851 return ret;
@@ -1187,7 +1258,7 @@ static void free_bitmap(struct btrfs_block_group_cache *block_group,
1187{ 1258{
1188 unlink_free_space(block_group, bitmap_info); 1259 unlink_free_space(block_group, bitmap_info);
1189 kfree(bitmap_info->bitmap); 1260 kfree(bitmap_info->bitmap);
1190 kfree(bitmap_info); 1261 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1191 block_group->total_bitmaps--; 1262 block_group->total_bitmaps--;
1192 recalculate_thresholds(block_group); 1263 recalculate_thresholds(block_group);
1193} 1264}
@@ -1285,9 +1356,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1285 * If we are below the extents threshold then we can add this as an 1356 * If we are below the extents threshold then we can add this as an
1286 * extent, and don't have to deal with the bitmap 1357 * extent, and don't have to deal with the bitmap
1287 */ 1358 */
1288 if (block_group->free_extents < block_group->extents_thresh && 1359 if (block_group->free_extents < block_group->extents_thresh) {
1289 info->bytes > block_group->sectorsize * 4) 1360 /*
1290 return 0; 1361 * If this block group has some small extents we don't want to
1362 * use up all of our free slots in the cache with them, we want
1363 * to reserve them to larger extents, however if we have plent
1364 * of cache left then go ahead an dadd them, no sense in adding
1365 * the overhead of a bitmap if we don't have to.
1366 */
1367 if (info->bytes <= block_group->sectorsize * 4) {
1368 if (block_group->free_extents * 2 <=
1369 block_group->extents_thresh)
1370 return 0;
1371 } else {
1372 return 0;
1373 }
1374 }
1291 1375
1292 /* 1376 /*
1293 * some block groups are so tiny they can't be enveloped by a bitmap, so 1377 * some block groups are so tiny they can't be enveloped by a bitmap, so
@@ -1342,8 +1426,8 @@ new_bitmap:
1342 1426
1343 /* no pre-allocated info, allocate a new one */ 1427 /* no pre-allocated info, allocate a new one */
1344 if (!info) { 1428 if (!info) {
1345 info = kzalloc(sizeof(struct btrfs_free_space), 1429 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1346 GFP_NOFS); 1430 GFP_NOFS);
1347 if (!info) { 1431 if (!info) {
1348 spin_lock(&block_group->tree_lock); 1432 spin_lock(&block_group->tree_lock);
1349 ret = -ENOMEM; 1433 ret = -ENOMEM;
@@ -1365,7 +1449,7 @@ out:
1365 if (info) { 1449 if (info) {
1366 if (info->bitmap) 1450 if (info->bitmap)
1367 kfree(info->bitmap); 1451 kfree(info->bitmap);
1368 kfree(info); 1452 kmem_cache_free(btrfs_free_space_cachep, info);
1369 } 1453 }
1370 1454
1371 return ret; 1455 return ret;
@@ -1398,7 +1482,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1398 else 1482 else
1399 __unlink_free_space(block_group, right_info); 1483 __unlink_free_space(block_group, right_info);
1400 info->bytes += right_info->bytes; 1484 info->bytes += right_info->bytes;
1401 kfree(right_info); 1485 kmem_cache_free(btrfs_free_space_cachep, right_info);
1402 merged = true; 1486 merged = true;
1403 } 1487 }
1404 1488
@@ -1410,7 +1494,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1410 __unlink_free_space(block_group, left_info); 1494 __unlink_free_space(block_group, left_info);
1411 info->offset = left_info->offset; 1495 info->offset = left_info->offset;
1412 info->bytes += left_info->bytes; 1496 info->bytes += left_info->bytes;
1413 kfree(left_info); 1497 kmem_cache_free(btrfs_free_space_cachep, left_info);
1414 merged = true; 1498 merged = true;
1415 } 1499 }
1416 1500
@@ -1423,7 +1507,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1423 struct btrfs_free_space *info; 1507 struct btrfs_free_space *info;
1424 int ret = 0; 1508 int ret = 0;
1425 1509
1426 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 1510 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1427 if (!info) 1511 if (!info)
1428 return -ENOMEM; 1512 return -ENOMEM;
1429 1513
@@ -1450,7 +1534,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1450link: 1534link:
1451 ret = link_free_space(block_group, info); 1535 ret = link_free_space(block_group, info);
1452 if (ret) 1536 if (ret)
1453 kfree(info); 1537 kmem_cache_free(btrfs_free_space_cachep, info);
1454out: 1538out:
1455 spin_unlock(&block_group->tree_lock); 1539 spin_unlock(&block_group->tree_lock);
1456 1540
@@ -1520,7 +1604,7 @@ again:
1520 kfree(info->bitmap); 1604 kfree(info->bitmap);
1521 block_group->total_bitmaps--; 1605 block_group->total_bitmaps--;
1522 } 1606 }
1523 kfree(info); 1607 kmem_cache_free(btrfs_free_space_cachep, info);
1524 goto out_lock; 1608 goto out_lock;
1525 } 1609 }
1526 1610
@@ -1556,7 +1640,7 @@ again:
1556 /* the hole we're creating ends at the end 1640 /* the hole we're creating ends at the end
1557 * of the info struct, just free the info 1641 * of the info struct, just free the info
1558 */ 1642 */
1559 kfree(info); 1643 kmem_cache_free(btrfs_free_space_cachep, info);
1560 } 1644 }
1561 spin_unlock(&block_group->tree_lock); 1645 spin_unlock(&block_group->tree_lock);
1562 1646
@@ -1629,30 +1713,28 @@ __btrfs_return_cluster_to_free_space(
1629{ 1713{
1630 struct btrfs_free_space *entry; 1714 struct btrfs_free_space *entry;
1631 struct rb_node *node; 1715 struct rb_node *node;
1632 bool bitmap;
1633 1716
1634 spin_lock(&cluster->lock); 1717 spin_lock(&cluster->lock);
1635 if (cluster->block_group != block_group) 1718 if (cluster->block_group != block_group)
1636 goto out; 1719 goto out;
1637 1720
1638 bitmap = cluster->points_to_bitmap;
1639 cluster->block_group = NULL; 1721 cluster->block_group = NULL;
1640 cluster->window_start = 0; 1722 cluster->window_start = 0;
1641 list_del_init(&cluster->block_group_list); 1723 list_del_init(&cluster->block_group_list);
1642 cluster->points_to_bitmap = false;
1643
1644 if (bitmap)
1645 goto out;
1646 1724
1647 node = rb_first(&cluster->root); 1725 node = rb_first(&cluster->root);
1648 while (node) { 1726 while (node) {
1727 bool bitmap;
1728
1649 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1729 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1650 node = rb_next(&entry->offset_index); 1730 node = rb_next(&entry->offset_index);
1651 rb_erase(&entry->offset_index, &cluster->root); 1731 rb_erase(&entry->offset_index, &cluster->root);
1652 BUG_ON(entry->bitmap); 1732
1653 try_merge_free_space(block_group, entry, false); 1733 bitmap = (entry->bitmap != NULL);
1734 if (!bitmap)
1735 try_merge_free_space(block_group, entry, false);
1654 tree_insert_offset(&block_group->free_space_offset, 1736 tree_insert_offset(&block_group->free_space_offset,
1655 entry->offset, &entry->offset_index, 0); 1737 entry->offset, &entry->offset_index, bitmap);
1656 } 1738 }
1657 cluster->root = RB_ROOT; 1739 cluster->root = RB_ROOT;
1658 1740
@@ -1689,7 +1771,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1689 unlink_free_space(block_group, info); 1771 unlink_free_space(block_group, info);
1690 if (info->bitmap) 1772 if (info->bitmap)
1691 kfree(info->bitmap); 1773 kfree(info->bitmap);
1692 kfree(info); 1774 kmem_cache_free(btrfs_free_space_cachep, info);
1693 if (need_resched()) { 1775 if (need_resched()) {
1694 spin_unlock(&block_group->tree_lock); 1776 spin_unlock(&block_group->tree_lock);
1695 cond_resched(); 1777 cond_resched();
@@ -1722,7 +1804,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1722 entry->offset += bytes; 1804 entry->offset += bytes;
1723 entry->bytes -= bytes; 1805 entry->bytes -= bytes;
1724 if (!entry->bytes) 1806 if (!entry->bytes)
1725 kfree(entry); 1807 kmem_cache_free(btrfs_free_space_cachep, entry);
1726 else 1808 else
1727 link_free_space(block_group, entry); 1809 link_free_space(block_group, entry);
1728 } 1810 }
@@ -1775,50 +1857,24 @@ int btrfs_return_cluster_to_free_space(
1775 1857
1776static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 1858static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1777 struct btrfs_free_cluster *cluster, 1859 struct btrfs_free_cluster *cluster,
1860 struct btrfs_free_space *entry,
1778 u64 bytes, u64 min_start) 1861 u64 bytes, u64 min_start)
1779{ 1862{
1780 struct btrfs_free_space *entry;
1781 int err; 1863 int err;
1782 u64 search_start = cluster->window_start; 1864 u64 search_start = cluster->window_start;
1783 u64 search_bytes = bytes; 1865 u64 search_bytes = bytes;
1784 u64 ret = 0; 1866 u64 ret = 0;
1785 1867
1786 spin_lock(&block_group->tree_lock);
1787 spin_lock(&cluster->lock);
1788
1789 if (!cluster->points_to_bitmap)
1790 goto out;
1791
1792 if (cluster->block_group != block_group)
1793 goto out;
1794
1795 /*
1796 * search_start is the beginning of the bitmap, but at some point it may
1797 * be a good idea to point to the actual start of the free area in the
1798 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1799 * to 1 to make sure we get the bitmap entry
1800 */
1801 entry = tree_search_offset(block_group,
1802 offset_to_bitmap(block_group, search_start),
1803 1, 0);
1804 if (!entry || !entry->bitmap)
1805 goto out;
1806
1807 search_start = min_start; 1868 search_start = min_start;
1808 search_bytes = bytes; 1869 search_bytes = bytes;
1809 1870
1810 err = search_bitmap(block_group, entry, &search_start, 1871 err = search_bitmap(block_group, entry, &search_start,
1811 &search_bytes); 1872 &search_bytes);
1812 if (err) 1873 if (err)
1813 goto out; 1874 return 0;
1814 1875
1815 ret = search_start; 1876 ret = search_start;
1816 bitmap_clear_bits(block_group, entry, ret, bytes); 1877 bitmap_clear_bits(block_group, entry, ret, bytes);
1817 if (entry->bytes == 0)
1818 free_bitmap(block_group, entry);
1819out:
1820 spin_unlock(&cluster->lock);
1821 spin_unlock(&block_group->tree_lock);
1822 1878
1823 return ret; 1879 return ret;
1824} 1880}
@@ -1836,10 +1892,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1836 struct rb_node *node; 1892 struct rb_node *node;
1837 u64 ret = 0; 1893 u64 ret = 0;
1838 1894
1839 if (cluster->points_to_bitmap)
1840 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1841 min_start);
1842
1843 spin_lock(&cluster->lock); 1895 spin_lock(&cluster->lock);
1844 if (bytes > cluster->max_size) 1896 if (bytes > cluster->max_size)
1845 goto out; 1897 goto out;
@@ -1852,9 +1904,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1852 goto out; 1904 goto out;
1853 1905
1854 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1906 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1855
1856 while(1) { 1907 while(1) {
1857 if (entry->bytes < bytes || entry->offset < min_start) { 1908 if (entry->bytes < bytes ||
1909 (!entry->bitmap && entry->offset < min_start)) {
1858 struct rb_node *node; 1910 struct rb_node *node;
1859 1911
1860 node = rb_next(&entry->offset_index); 1912 node = rb_next(&entry->offset_index);
@@ -1864,10 +1916,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1864 offset_index); 1916 offset_index);
1865 continue; 1917 continue;
1866 } 1918 }
1867 ret = entry->offset;
1868 1919
1869 entry->offset += bytes; 1920 if (entry->bitmap) {
1870 entry->bytes -= bytes; 1921 ret = btrfs_alloc_from_bitmap(block_group,
1922 cluster, entry, bytes,
1923 min_start);
1924 if (ret == 0) {
1925 struct rb_node *node;
1926 node = rb_next(&entry->offset_index);
1927 if (!node)
1928 break;
1929 entry = rb_entry(node, struct btrfs_free_space,
1930 offset_index);
1931 continue;
1932 }
1933 } else {
1934
1935 ret = entry->offset;
1936
1937 entry->offset += bytes;
1938 entry->bytes -= bytes;
1939 }
1871 1940
1872 if (entry->bytes == 0) 1941 if (entry->bytes == 0)
1873 rb_erase(&entry->offset_index, &cluster->root); 1942 rb_erase(&entry->offset_index, &cluster->root);
@@ -1884,7 +1953,12 @@ out:
1884 block_group->free_space -= bytes; 1953 block_group->free_space -= bytes;
1885 if (entry->bytes == 0) { 1954 if (entry->bytes == 0) {
1886 block_group->free_extents--; 1955 block_group->free_extents--;
1887 kfree(entry); 1956 if (entry->bitmap) {
1957 kfree(entry->bitmap);
1958 block_group->total_bitmaps--;
1959 recalculate_thresholds(block_group);
1960 }
1961 kmem_cache_free(btrfs_free_space_cachep, entry);
1888 } 1962 }
1889 1963
1890 spin_unlock(&block_group->tree_lock); 1964 spin_unlock(&block_group->tree_lock);
@@ -1904,12 +1978,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1904 unsigned long found_bits; 1978 unsigned long found_bits;
1905 unsigned long start = 0; 1979 unsigned long start = 0;
1906 unsigned long total_found = 0; 1980 unsigned long total_found = 0;
1981 int ret;
1907 bool found = false; 1982 bool found = false;
1908 1983
1909 i = offset_to_bit(entry->offset, block_group->sectorsize, 1984 i = offset_to_bit(entry->offset, block_group->sectorsize,
1910 max_t(u64, offset, entry->offset)); 1985 max_t(u64, offset, entry->offset));
1911 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 1986 search_bits = bytes_to_bits(bytes, block_group->sectorsize);
1912 total_bits = bytes_to_bits(bytes, block_group->sectorsize); 1987 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1913 1988
1914again: 1989again:
1915 found_bits = 0; 1990 found_bits = 0;
@@ -1926,7 +2001,7 @@ again:
1926 } 2001 }
1927 2002
1928 if (!found_bits) 2003 if (!found_bits)
1929 return -1; 2004 return -ENOSPC;
1930 2005
1931 if (!found) { 2006 if (!found) {
1932 start = i; 2007 start = i;
@@ -1950,189 +2025,208 @@ again:
1950 2025
1951 cluster->window_start = start * block_group->sectorsize + 2026 cluster->window_start = start * block_group->sectorsize +
1952 entry->offset; 2027 entry->offset;
1953 cluster->points_to_bitmap = true; 2028 rb_erase(&entry->offset_index, &block_group->free_space_offset);
2029 ret = tree_insert_offset(&cluster->root, entry->offset,
2030 &entry->offset_index, 1);
2031 BUG_ON(ret);
1954 2032
1955 return 0; 2033 return 0;
1956} 2034}
1957 2035
1958/* 2036/*
1959 * here we try to find a cluster of blocks in a block group. The goal 2037 * This searches the block group for just extents to fill the cluster with.
1960 * is to find at least bytes free and up to empty_size + bytes free.
1961 * We might not find them all in one contiguous area.
1962 *
1963 * returns zero and sets up cluster if things worked out, otherwise
1964 * it returns -enospc
1965 */ 2038 */
1966int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2039static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
1967 struct btrfs_root *root, 2040 struct btrfs_free_cluster *cluster,
1968 struct btrfs_block_group_cache *block_group, 2041 u64 offset, u64 bytes, u64 min_bytes)
1969 struct btrfs_free_cluster *cluster,
1970 u64 offset, u64 bytes, u64 empty_size)
1971{ 2042{
2043 struct btrfs_free_space *first = NULL;
1972 struct btrfs_free_space *entry = NULL; 2044 struct btrfs_free_space *entry = NULL;
2045 struct btrfs_free_space *prev = NULL;
2046 struct btrfs_free_space *last;
1973 struct rb_node *node; 2047 struct rb_node *node;
1974 struct btrfs_free_space *next;
1975 struct btrfs_free_space *last = NULL;
1976 u64 min_bytes;
1977 u64 window_start; 2048 u64 window_start;
1978 u64 window_free; 2049 u64 window_free;
1979 u64 max_extent = 0; 2050 u64 max_extent;
1980 bool found_bitmap = false; 2051 u64 max_gap = 128 * 1024;
1981 int ret;
1982 2052
1983 /* for metadata, allow allocates with more holes */ 2053 entry = tree_search_offset(block_group, offset, 0, 1);
1984 if (btrfs_test_opt(root, SSD_SPREAD)) { 2054 if (!entry)
1985 min_bytes = bytes + empty_size; 2055 return -ENOSPC;
1986 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1987 /*
1988 * we want to do larger allocations when we are
1989 * flushing out the delayed refs, it helps prevent
1990 * making more work as we go along.
1991 */
1992 if (trans->transaction->delayed_refs.flushing)
1993 min_bytes = max(bytes, (bytes + empty_size) >> 1);
1994 else
1995 min_bytes = max(bytes, (bytes + empty_size) >> 4);
1996 } else
1997 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1998
1999 spin_lock(&block_group->tree_lock);
2000 spin_lock(&cluster->lock);
2001
2002 /* someone already found a cluster, hooray */
2003 if (cluster->block_group) {
2004 ret = 0;
2005 goto out;
2006 }
2007again:
2008 entry = tree_search_offset(block_group, offset, found_bitmap, 1);
2009 if (!entry) {
2010 ret = -ENOSPC;
2011 goto out;
2012 }
2013 2056
2014 /* 2057 /*
2015 * If found_bitmap is true, we exhausted our search for extent entries, 2058 * We don't want bitmaps, so just move along until we find a normal
2016 * and we just want to search all of the bitmaps that we can find, and 2059 * extent entry.
2017 * ignore any extent entries we find.
2018 */ 2060 */
2019 while (entry->bitmap || found_bitmap || 2061 while (entry->bitmap) {
2020 (!entry->bitmap && entry->bytes < min_bytes)) { 2062 node = rb_next(&entry->offset_index);
2021 struct rb_node *node = rb_next(&entry->offset_index); 2063 if (!node)
2022 2064 return -ENOSPC;
2023 if (entry->bitmap && entry->bytes > bytes + empty_size) {
2024 ret = btrfs_bitmap_cluster(block_group, entry, cluster,
2025 offset, bytes + empty_size,
2026 min_bytes);
2027 if (!ret)
2028 goto got_it;
2029 }
2030
2031 if (!node) {
2032 ret = -ENOSPC;
2033 goto out;
2034 }
2035 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2065 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2036 } 2066 }
2037 2067
2038 /*
2039 * We already searched all the extent entries from the passed in offset
2040 * to the end and didn't find enough space for the cluster, and we also
2041 * didn't find any bitmaps that met our criteria, just go ahead and exit
2042 */
2043 if (found_bitmap) {
2044 ret = -ENOSPC;
2045 goto out;
2046 }
2047
2048 cluster->points_to_bitmap = false;
2049 window_start = entry->offset; 2068 window_start = entry->offset;
2050 window_free = entry->bytes; 2069 window_free = entry->bytes;
2051 last = entry;
2052 max_extent = entry->bytes; 2070 max_extent = entry->bytes;
2071 first = entry;
2072 last = entry;
2073 prev = entry;
2053 2074
2054 while (1) { 2075 while (window_free <= min_bytes) {
2055 /* out window is just right, lets fill it */ 2076 node = rb_next(&entry->offset_index);
2056 if (window_free >= bytes + empty_size) 2077 if (!node)
2057 break; 2078 return -ENOSPC;
2058 2079 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2059 node = rb_next(&last->offset_index);
2060 if (!node) {
2061 if (found_bitmap)
2062 goto again;
2063 ret = -ENOSPC;
2064 goto out;
2065 }
2066 next = rb_entry(node, struct btrfs_free_space, offset_index);
2067 2080
2068 /* 2081 if (entry->bitmap)
2069 * we found a bitmap, so if this search doesn't result in a
2070 * cluster, we know to go and search again for the bitmaps and
2071 * start looking for space there
2072 */
2073 if (next->bitmap) {
2074 if (!found_bitmap)
2075 offset = next->offset;
2076 found_bitmap = true;
2077 last = next;
2078 continue; 2082 continue;
2079 }
2080
2081 /* 2083 /*
2082 * we haven't filled the empty size and the window is 2084 * we haven't filled the empty size and the window is
2083 * very large. reset and try again 2085 * very large. reset and try again
2084 */ 2086 */
2085 if (next->offset - (last->offset + last->bytes) > 128 * 1024 || 2087 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2086 next->offset - window_start > (bytes + empty_size) * 2) { 2088 entry->offset - window_start > (min_bytes * 2)) {
2087 entry = next; 2089 first = entry;
2088 window_start = entry->offset; 2090 window_start = entry->offset;
2089 window_free = entry->bytes; 2091 window_free = entry->bytes;
2090 last = entry; 2092 last = entry;
2091 max_extent = entry->bytes; 2093 max_extent = entry->bytes;
2092 } else { 2094 } else {
2093 last = next; 2095 last = entry;
2094 window_free += next->bytes; 2096 window_free += entry->bytes;
2095 if (entry->bytes > max_extent) 2097 if (entry->bytes > max_extent)
2096 max_extent = entry->bytes; 2098 max_extent = entry->bytes;
2097 } 2099 }
2100 prev = entry;
2098 } 2101 }
2099 2102
2100 cluster->window_start = entry->offset; 2103 cluster->window_start = first->offset;
2104
2105 node = &first->offset_index;
2101 2106
2102 /* 2107 /*
2103 * now we've found our entries, pull them out of the free space 2108 * now we've found our entries, pull them out of the free space
2104 * cache and put them into the cluster rbtree 2109 * cache and put them into the cluster rbtree
2105 *
2106 * The cluster includes an rbtree, but only uses the offset index
2107 * of each free space cache entry.
2108 */ 2110 */
2109 while (1) { 2111 do {
2112 int ret;
2113
2114 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2110 node = rb_next(&entry->offset_index); 2115 node = rb_next(&entry->offset_index);
2111 if (entry->bitmap && node) { 2116 if (entry->bitmap)
2112 entry = rb_entry(node, struct btrfs_free_space,
2113 offset_index);
2114 continue; 2117 continue;
2115 } else if (entry->bitmap && !node) {
2116 break;
2117 }
2118 2118
2119 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2119 rb_erase(&entry->offset_index, &block_group->free_space_offset);
2120 ret = tree_insert_offset(&cluster->root, entry->offset, 2120 ret = tree_insert_offset(&cluster->root, entry->offset,
2121 &entry->offset_index, 0); 2121 &entry->offset_index, 0);
2122 BUG_ON(ret); 2122 BUG_ON(ret);
2123 } while (node && entry != last);
2123 2124
2124 if (!node || entry == last) 2125 cluster->max_size = max_extent;
2125 break;
2126 2126
2127 return 0;
2128}
2129
2130/*
2131 * This specifically looks for bitmaps that may work in the cluster, we assume
2132 * that we have already failed to find extents that will work.
2133 */
2134static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2135 struct btrfs_free_cluster *cluster,
2136 u64 offset, u64 bytes, u64 min_bytes)
2137{
2138 struct btrfs_free_space *entry;
2139 struct rb_node *node;
2140 int ret = -ENOSPC;
2141
2142 if (block_group->total_bitmaps == 0)
2143 return -ENOSPC;
2144
2145 entry = tree_search_offset(block_group,
2146 offset_to_bitmap(block_group, offset),
2147 0, 1);
2148 if (!entry)
2149 return -ENOSPC;
2150
2151 node = &entry->offset_index;
2152 do {
2127 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2153 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2154 node = rb_next(&entry->offset_index);
2155 if (!entry->bitmap)
2156 continue;
2157 if (entry->bytes < min_bytes)
2158 continue;
2159 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2160 bytes, min_bytes);
2161 } while (ret && node);
2162
2163 return ret;
2164}
2165
2166/*
2167 * here we try to find a cluster of blocks in a block group. The goal
2168 * is to find at least bytes free and up to empty_size + bytes free.
2169 * We might not find them all in one contiguous area.
2170 *
2171 * returns zero and sets up cluster if things worked out, otherwise
2172 * it returns -enospc
2173 */
2174int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2175 struct btrfs_root *root,
2176 struct btrfs_block_group_cache *block_group,
2177 struct btrfs_free_cluster *cluster,
2178 u64 offset, u64 bytes, u64 empty_size)
2179{
2180 u64 min_bytes;
2181 int ret;
2182
2183 /* for metadata, allow allocates with more holes */
2184 if (btrfs_test_opt(root, SSD_SPREAD)) {
2185 min_bytes = bytes + empty_size;
2186 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2187 /*
2188 * we want to do larger allocations when we are
2189 * flushing out the delayed refs, it helps prevent
2190 * making more work as we go along.
2191 */
2192 if (trans->transaction->delayed_refs.flushing)
2193 min_bytes = max(bytes, (bytes + empty_size) >> 1);
2194 else
2195 min_bytes = max(bytes, (bytes + empty_size) >> 4);
2196 } else
2197 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2198
2199 spin_lock(&block_group->tree_lock);
2200
2201 /*
2202 * If we know we don't have enough space to make a cluster don't even
2203 * bother doing all the work to try and find one.
2204 */
2205 if (block_group->free_space < min_bytes) {
2206 spin_unlock(&block_group->tree_lock);
2207 return -ENOSPC;
2128 } 2208 }
2129 2209
2130 cluster->max_size = max_extent; 2210 spin_lock(&cluster->lock);
2131got_it: 2211
2132 ret = 0; 2212 /* someone already found a cluster, hooray */
2133 atomic_inc(&block_group->count); 2213 if (cluster->block_group) {
2134 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 2214 ret = 0;
2135 cluster->block_group = block_group; 2215 goto out;
2216 }
2217
2218 ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
2219 min_bytes);
2220 if (ret)
2221 ret = setup_cluster_bitmap(block_group, cluster, offset,
2222 bytes, min_bytes);
2223
2224 if (!ret) {
2225 atomic_inc(&block_group->count);
2226 list_add_tail(&cluster->block_group_list,
2227 &block_group->cluster_list);
2228 cluster->block_group = block_group;
2229 }
2136out: 2230out:
2137 spin_unlock(&cluster->lock); 2231 spin_unlock(&cluster->lock);
2138 spin_unlock(&block_group->tree_lock); 2232 spin_unlock(&block_group->tree_lock);
@@ -2149,8 +2243,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2149 spin_lock_init(&cluster->refill_lock); 2243 spin_lock_init(&cluster->refill_lock);
2150 cluster->root = RB_ROOT; 2244 cluster->root = RB_ROOT;
2151 cluster->max_size = 0; 2245 cluster->max_size = 0;
2152 cluster->points_to_bitmap = false;
2153 INIT_LIST_HEAD(&cluster->block_group_list); 2246 INIT_LIST_HEAD(&cluster->block_group_list);
2154 cluster->block_group = NULL; 2247 cluster->block_group = NULL;
2155} 2248}
2156 2249
2250int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2251 u64 *trimmed, u64 start, u64 end, u64 minlen)
2252{
2253 struct btrfs_free_space *entry = NULL;
2254 struct btrfs_fs_info *fs_info = block_group->fs_info;
2255 u64 bytes = 0;
2256 u64 actually_trimmed;
2257 int ret = 0;
2258
2259 *trimmed = 0;
2260
2261 while (start < end) {
2262 spin_lock(&block_group->tree_lock);
2263
2264 if (block_group->free_space < minlen) {
2265 spin_unlock(&block_group->tree_lock);
2266 break;
2267 }
2268
2269 entry = tree_search_offset(block_group, start, 0, 1);
2270 if (!entry)
2271 entry = tree_search_offset(block_group,
2272 offset_to_bitmap(block_group,
2273 start),
2274 1, 1);
2275
2276 if (!entry || entry->offset >= end) {
2277 spin_unlock(&block_group->tree_lock);
2278 break;
2279 }
2280
2281 if (entry->bitmap) {
2282 ret = search_bitmap(block_group, entry, &start, &bytes);
2283 if (!ret) {
2284 if (start >= end) {
2285 spin_unlock(&block_group->tree_lock);
2286 break;
2287 }
2288 bytes = min(bytes, end - start);
2289 bitmap_clear_bits(block_group, entry,
2290 start, bytes);
2291 if (entry->bytes == 0)
2292 free_bitmap(block_group, entry);
2293 } else {
2294 start = entry->offset + BITS_PER_BITMAP *
2295 block_group->sectorsize;
2296 spin_unlock(&block_group->tree_lock);
2297 ret = 0;
2298 continue;
2299 }
2300 } else {
2301 start = entry->offset;
2302 bytes = min(entry->bytes, end - start);
2303 unlink_free_space(block_group, entry);
2304 kfree(entry);
2305 }
2306
2307 spin_unlock(&block_group->tree_lock);
2308
2309 if (bytes >= minlen) {
2310 int update_ret;
2311 update_ret = btrfs_update_reserved_bytes(block_group,
2312 bytes, 1, 1);
2313
2314 ret = btrfs_error_discard_extent(fs_info->extent_root,
2315 start,
2316 bytes,
2317 &actually_trimmed);
2318
2319 btrfs_add_free_space(block_group,
2320 start, bytes);
2321 if (!update_ret)
2322 btrfs_update_reserved_bytes(block_group,
2323 bytes, 0, 1);
2324
2325 if (ret)
2326 break;
2327 *trimmed += actually_trimmed;
2328 }
2329 start += bytes;
2330 bytes = 0;
2331
2332 if (fatal_signal_pending(current)) {
2333 ret = -ERESTARTSYS;
2334 break;
2335 }
2336
2337 cond_resched();
2338 }
2339
2340 return ret;
2341}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index e49ca5c321b5..65c3b935289f 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
68int btrfs_return_cluster_to_free_space( 68int btrfs_return_cluster_to_free_space(
69 struct btrfs_block_group_cache *block_group, 69 struct btrfs_block_group_cache *block_group,
70 struct btrfs_free_cluster *cluster); 70 struct btrfs_free_cluster *cluster);
71int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
72 u64 *trimmed, u64 start, u64 end, u64 minlen);
71#endif 73#endif
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index c56eb5909172..c05a08f4c411 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
30 int slot; 30 int slot;
31 31
32 path = btrfs_alloc_path(); 32 path = btrfs_alloc_path();
33 BUG_ON(!path); 33 if (!path)
34 return -ENOMEM;
34 35
35 search_key.objectid = BTRFS_LAST_FREE_OBJECTID; 36 search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
36 search_key.type = -1; 37 search_key.type = -1;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 512c3d1da083..fcd66b6a8086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -50,6 +50,7 @@
50#include "tree-log.h" 50#include "tree-log.h"
51#include "compression.h" 51#include "compression.h"
52#include "locking.h" 52#include "locking.h"
53#include "free-space-cache.h"
53 54
54struct btrfs_iget_args { 55struct btrfs_iget_args {
55 u64 ino; 56 u64 ino;
@@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep;
70struct kmem_cache *btrfs_trans_handle_cachep; 71struct kmem_cache *btrfs_trans_handle_cachep;
71struct kmem_cache *btrfs_transaction_cachep; 72struct kmem_cache *btrfs_transaction_cachep;
72struct kmem_cache *btrfs_path_cachep; 73struct kmem_cache *btrfs_path_cachep;
74struct kmem_cache *btrfs_free_space_cachep;
73 75
74#define S_SHIFT 12 76#define S_SHIFT 12
75static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { 77static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -82,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
82 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, 84 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
83}; 85};
84 86
85static void btrfs_truncate(struct inode *inode); 87static int btrfs_setsize(struct inode *inode, loff_t newsize);
88static int btrfs_truncate(struct inode *inode);
86static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); 89static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
87static noinline int cow_file_range(struct inode *inode, 90static noinline int cow_file_range(struct inode *inode,
88 struct page *locked_page, 91 struct page *locked_page,
@@ -109,6 +112,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
109static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 112static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
110 struct btrfs_root *root, struct inode *inode, 113 struct btrfs_root *root, struct inode *inode,
111 u64 start, size_t size, size_t compressed_size, 114 u64 start, size_t size, size_t compressed_size,
115 int compress_type,
112 struct page **compressed_pages) 116 struct page **compressed_pages)
113{ 117{
114 struct btrfs_key key; 118 struct btrfs_key key;
@@ -123,12 +127,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
123 size_t cur_size = size; 127 size_t cur_size = size;
124 size_t datasize; 128 size_t datasize;
125 unsigned long offset; 129 unsigned long offset;
126 int compress_type = BTRFS_COMPRESS_NONE;
127 130
128 if (compressed_size && compressed_pages) { 131 if (compressed_size && compressed_pages)
129 compress_type = root->fs_info->compress_type;
130 cur_size = compressed_size; 132 cur_size = compressed_size;
131 }
132 133
133 path = btrfs_alloc_path(); 134 path = btrfs_alloc_path();
134 if (!path) 135 if (!path)
@@ -218,7 +219,7 @@ fail:
218static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, 219static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
219 struct btrfs_root *root, 220 struct btrfs_root *root,
220 struct inode *inode, u64 start, u64 end, 221 struct inode *inode, u64 start, u64 end,
221 size_t compressed_size, 222 size_t compressed_size, int compress_type,
222 struct page **compressed_pages) 223 struct page **compressed_pages)
223{ 224{
224 u64 isize = i_size_read(inode); 225 u64 isize = i_size_read(inode);
@@ -251,7 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
251 inline_len = min_t(u64, isize, actual_end); 252 inline_len = min_t(u64, isize, actual_end);
252 ret = insert_inline_extent(trans, root, inode, start, 253 ret = insert_inline_extent(trans, root, inode, start,
253 inline_len, compressed_size, 254 inline_len, compressed_size,
254 compressed_pages); 255 compress_type, compressed_pages);
255 BUG_ON(ret); 256 BUG_ON(ret);
256 btrfs_delalloc_release_metadata(inode, end + 1 - start); 257 btrfs_delalloc_release_metadata(inode, end + 1 - start);
257 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 258 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
@@ -288,6 +289,7 @@ static noinline int add_async_extent(struct async_cow *cow,
288 struct async_extent *async_extent; 289 struct async_extent *async_extent;
289 290
290 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); 291 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
292 BUG_ON(!async_extent);
291 async_extent->start = start; 293 async_extent->start = start;
292 async_extent->ram_size = ram_size; 294 async_extent->ram_size = ram_size;
293 async_extent->compressed_size = compressed_size; 295 async_extent->compressed_size = compressed_size;
@@ -382,9 +384,11 @@ again:
382 */ 384 */
383 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 385 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
384 (btrfs_test_opt(root, COMPRESS) || 386 (btrfs_test_opt(root, COMPRESS) ||
385 (BTRFS_I(inode)->force_compress))) { 387 (BTRFS_I(inode)->force_compress) ||
388 (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
386 WARN_ON(pages); 389 WARN_ON(pages);
387 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 390 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
391 BUG_ON(!pages);
388 392
389 if (BTRFS_I(inode)->force_compress) 393 if (BTRFS_I(inode)->force_compress)
390 compress_type = BTRFS_I(inode)->force_compress; 394 compress_type = BTRFS_I(inode)->force_compress;
@@ -427,12 +431,13 @@ again:
427 * to make an uncompressed inline extent. 431 * to make an uncompressed inline extent.
428 */ 432 */
429 ret = cow_file_range_inline(trans, root, inode, 433 ret = cow_file_range_inline(trans, root, inode,
430 start, end, 0, NULL); 434 start, end, 0, 0, NULL);
431 } else { 435 } else {
432 /* try making a compressed inline extent */ 436 /* try making a compressed inline extent */
433 ret = cow_file_range_inline(trans, root, inode, 437 ret = cow_file_range_inline(trans, root, inode,
434 start, end, 438 start, end,
435 total_compressed, pages); 439 total_compressed,
440 compress_type, pages);
436 } 441 }
437 if (ret == 0) { 442 if (ret == 0) {
438 /* 443 /*
@@ -786,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode,
786 if (start == 0) { 791 if (start == 0) {
787 /* lets try to make an inline extent */ 792 /* lets try to make an inline extent */
788 ret = cow_file_range_inline(trans, root, inode, 793 ret = cow_file_range_inline(trans, root, inode,
789 start, end, 0, NULL); 794 start, end, 0, 0, NULL);
790 if (ret == 0) { 795 if (ret == 0) {
791 extent_clear_unlock_delalloc(inode, 796 extent_clear_unlock_delalloc(inode,
792 &BTRFS_I(inode)->io_tree, 797 &BTRFS_I(inode)->io_tree,
@@ -1254,7 +1259,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1254 ret = run_delalloc_nocow(inode, locked_page, start, end, 1259 ret = run_delalloc_nocow(inode, locked_page, start, end,
1255 page_started, 0, nr_written); 1260 page_started, 0, nr_written);
1256 else if (!btrfs_test_opt(root, COMPRESS) && 1261 else if (!btrfs_test_opt(root, COMPRESS) &&
1257 !(BTRFS_I(inode)->force_compress)) 1262 !(BTRFS_I(inode)->force_compress) &&
1263 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
1258 ret = cow_file_range(inode, locked_page, start, end, 1264 ret = cow_file_range(inode, locked_page, start, end,
1259 page_started, nr_written, 1); 1265 page_started, nr_written, 1);
1260 else 1266 else
@@ -1461,8 +1467,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1461 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1467 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1462 return btrfs_submit_compressed_read(inode, bio, 1468 return btrfs_submit_compressed_read(inode, bio,
1463 mirror_num, bio_flags); 1469 mirror_num, bio_flags);
1464 } else if (!skip_sum) 1470 } else if (!skip_sum) {
1465 btrfs_lookup_bio_sums(root, inode, bio, NULL); 1471 ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
1472 if (ret)
1473 return ret;
1474 }
1466 goto mapit; 1475 goto mapit;
1467 } else if (!skip_sum) { 1476 } else if (!skip_sum) {
1468 /* csum items have already been cloned */ 1477 /* csum items have already been cloned */
@@ -1761,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1761 add_pending_csums(trans, inode, ordered_extent->file_offset, 1770 add_pending_csums(trans, inode, ordered_extent->file_offset,
1762 &ordered_extent->list); 1771 &ordered_extent->list);
1763 1772
1764 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1773 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1765 ret = btrfs_update_inode(trans, root, inode); 1774 if (!ret) {
1766 BUG_ON(ret); 1775 ret = btrfs_update_inode(trans, root, inode);
1776 BUG_ON(ret);
1777 }
1778 ret = 0;
1767out: 1779out:
1768 if (nolock) { 1780 if (nolock) {
1769 if (trans) 1781 if (trans)
@@ -1785,6 +1797,8 @@ out:
1785static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1797static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1786 struct extent_state *state, int uptodate) 1798 struct extent_state *state, int uptodate)
1787{ 1799{
1800 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
1801
1788 ClearPagePrivate2(page); 1802 ClearPagePrivate2(page);
1789 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1803 return btrfs_finish_ordered_io(page->mapping->host, start, end);
1790} 1804}
@@ -1895,10 +1909,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1895 else 1909 else
1896 rw = READ; 1910 rw = READ;
1897 1911
1898 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1912 ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1899 failrec->last_mirror, 1913 failrec->last_mirror,
1900 failrec->bio_flags, 0); 1914 failrec->bio_flags, 0);
1901 return 0; 1915 return ret;
1902} 1916}
1903 1917
1904/* 1918/*
@@ -2210,8 +2224,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2210 insert = 1; 2224 insert = 1;
2211#endif 2225#endif
2212 insert = 1; 2226 insert = 1;
2213 } else {
2214 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2215 } 2227 }
2216 2228
2217 if (!BTRFS_I(inode)->orphan_meta_reserved) { 2229 if (!BTRFS_I(inode)->orphan_meta_reserved) {
@@ -2282,7 +2294,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2282 * this cleans up any orphans that may be left on the list from the last use 2294 * this cleans up any orphans that may be left on the list from the last use
2283 * of this root. 2295 * of this root.
2284 */ 2296 */
2285void btrfs_orphan_cleanup(struct btrfs_root *root) 2297int btrfs_orphan_cleanup(struct btrfs_root *root)
2286{ 2298{
2287 struct btrfs_path *path; 2299 struct btrfs_path *path;
2288 struct extent_buffer *leaf; 2300 struct extent_buffer *leaf;
@@ -2292,10 +2304,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2292 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2304 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2293 2305
2294 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) 2306 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
2295 return; 2307 return 0;
2296 2308
2297 path = btrfs_alloc_path(); 2309 path = btrfs_alloc_path();
2298 BUG_ON(!path); 2310 if (!path) {
2311 ret = -ENOMEM;
2312 goto out;
2313 }
2299 path->reada = -1; 2314 path->reada = -1;
2300 2315
2301 key.objectid = BTRFS_ORPHAN_OBJECTID; 2316 key.objectid = BTRFS_ORPHAN_OBJECTID;
@@ -2304,18 +2319,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2304 2319
2305 while (1) { 2320 while (1) {
2306 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2321 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2307 if (ret < 0) { 2322 if (ret < 0)
2308 printk(KERN_ERR "Error searching slot for orphan: %d" 2323 goto out;
2309 "\n", ret);
2310 break;
2311 }
2312 2324
2313 /* 2325 /*
2314 * if ret == 0 means we found what we were searching for, which 2326 * if ret == 0 means we found what we were searching for, which
2315 * is weird, but possible, so only screw with path if we didnt 2327 * is weird, but possible, so only screw with path if we didn't
2316 * find the key and see if we have stuff that matches 2328 * find the key and see if we have stuff that matches
2317 */ 2329 */
2318 if (ret > 0) { 2330 if (ret > 0) {
2331 ret = 0;
2319 if (path->slots[0] == 0) 2332 if (path->slots[0] == 0)
2320 break; 2333 break;
2321 path->slots[0]--; 2334 path->slots[0]--;
@@ -2343,7 +2356,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2343 found_key.type = BTRFS_INODE_ITEM_KEY; 2356 found_key.type = BTRFS_INODE_ITEM_KEY;
2344 found_key.offset = 0; 2357 found_key.offset = 0;
2345 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); 2358 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2346 BUG_ON(IS_ERR(inode)); 2359 if (IS_ERR(inode)) {
2360 ret = PTR_ERR(inode);
2361 goto out;
2362 }
2347 2363
2348 /* 2364 /*
2349 * add this inode to the orphan list so btrfs_orphan_del does 2365 * add this inode to the orphan list so btrfs_orphan_del does
@@ -2361,7 +2377,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2361 */ 2377 */
2362 if (is_bad_inode(inode)) { 2378 if (is_bad_inode(inode)) {
2363 trans = btrfs_start_transaction(root, 0); 2379 trans = btrfs_start_transaction(root, 0);
2364 BUG_ON(IS_ERR(trans)); 2380 if (IS_ERR(trans)) {
2381 ret = PTR_ERR(trans);
2382 goto out;
2383 }
2365 btrfs_orphan_del(trans, inode); 2384 btrfs_orphan_del(trans, inode);
2366 btrfs_end_transaction(trans, root); 2385 btrfs_end_transaction(trans, root);
2367 iput(inode); 2386 iput(inode);
@@ -2370,17 +2389,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2370 2389
2371 /* if we have links, this was a truncate, lets do that */ 2390 /* if we have links, this was a truncate, lets do that */
2372 if (inode->i_nlink) { 2391 if (inode->i_nlink) {
2392 if (!S_ISREG(inode->i_mode)) {
2393 WARN_ON(1);
2394 iput(inode);
2395 continue;
2396 }
2373 nr_truncate++; 2397 nr_truncate++;
2374 btrfs_truncate(inode); 2398 ret = btrfs_truncate(inode);
2375 } else { 2399 } else {
2376 nr_unlink++; 2400 nr_unlink++;
2377 } 2401 }
2378 2402
2379 /* this will do delete_inode and everything for us */ 2403 /* this will do delete_inode and everything for us */
2380 iput(inode); 2404 iput(inode);
2405 if (ret)
2406 goto out;
2381 } 2407 }
2382 btrfs_free_path(path);
2383
2384 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; 2408 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
2385 2409
2386 if (root->orphan_block_rsv) 2410 if (root->orphan_block_rsv)
@@ -2389,14 +2413,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2389 2413
2390 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2414 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2391 trans = btrfs_join_transaction(root, 1); 2415 trans = btrfs_join_transaction(root, 1);
2392 BUG_ON(IS_ERR(trans)); 2416 if (!IS_ERR(trans))
2393 btrfs_end_transaction(trans, root); 2417 btrfs_end_transaction(trans, root);
2394 } 2418 }
2395 2419
2396 if (nr_unlink) 2420 if (nr_unlink)
2397 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 2421 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
2398 if (nr_truncate) 2422 if (nr_truncate)
2399 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 2423 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
2424
2425out:
2426 if (ret)
2427 printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
2428 btrfs_free_path(path);
2429 return ret;
2400} 2430}
2401 2431
2402/* 2432/*
@@ -2563,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2563 struct btrfs_inode_item *item, 2593 struct btrfs_inode_item *item,
2564 struct inode *inode) 2594 struct inode *inode)
2565{ 2595{
2596 if (!leaf->map_token)
2597 map_private_extent_buffer(leaf, (unsigned long)item,
2598 sizeof(struct btrfs_inode_item),
2599 &leaf->map_token, &leaf->kaddr,
2600 &leaf->map_start, &leaf->map_len,
2601 KM_USER1);
2602
2566 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2603 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2567 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2604 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2568 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2605 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2591,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2591 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2628 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2592 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2629 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2593 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2630 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2631
2632 if (leaf->map_token) {
2633 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2634 leaf->map_token = NULL;
2635 }
2594} 2636}
2595 2637
2596/* 2638/*
@@ -2635,10 +2677,10 @@ failed:
2635 * recovery code. It remove a link in a directory with a given name, and 2677 * recovery code. It remove a link in a directory with a given name, and
2636 * also drops the back refs in the inode to the directory 2678 * also drops the back refs in the inode to the directory
2637 */ 2679 */
2638int btrfs_unlink_inode(struct btrfs_trans_handle *trans, 2680static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2639 struct btrfs_root *root, 2681 struct btrfs_root *root,
2640 struct inode *dir, struct inode *inode, 2682 struct inode *dir, struct inode *inode,
2641 const char *name, int name_len) 2683 const char *name, int name_len)
2642{ 2684{
2643 struct btrfs_path *path; 2685 struct btrfs_path *path;
2644 int ret = 0; 2686 int ret = 0;
@@ -2710,12 +2752,25 @@ err:
2710 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 2752 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2711 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2753 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2712 btrfs_update_inode(trans, root, dir); 2754 btrfs_update_inode(trans, root, dir);
2713 btrfs_drop_nlink(inode);
2714 ret = btrfs_update_inode(trans, root, inode);
2715out: 2755out:
2716 return ret; 2756 return ret;
2717} 2757}
2718 2758
2759int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2760 struct btrfs_root *root,
2761 struct inode *dir, struct inode *inode,
2762 const char *name, int name_len)
2763{
2764 int ret;
2765 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
2766 if (!ret) {
2767 btrfs_drop_nlink(inode);
2768 ret = btrfs_update_inode(trans, root, inode);
2769 }
2770 return ret;
2771}
2772
2773
2719/* helper to check if there is any shared block in the path */ 2774/* helper to check if there is any shared block in the path */
2720static int check_path_shared(struct btrfs_root *root, 2775static int check_path_shared(struct btrfs_root *root,
2721 struct btrfs_path *path) 2776 struct btrfs_path *path)
@@ -3537,7 +3592,13 @@ out:
3537 return ret; 3592 return ret;
3538} 3593}
3539 3594
3540int btrfs_cont_expand(struct inode *inode, loff_t size) 3595/*
3596 * This function puts in dummy file extents for the area we're creating a hole
3597 * for. So if we are truncating this file to a larger size we need to insert
3598 * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
3599 * the range between oldsize and size
3600 */
3601int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3541{ 3602{
3542 struct btrfs_trans_handle *trans; 3603 struct btrfs_trans_handle *trans;
3543 struct btrfs_root *root = BTRFS_I(inode)->root; 3604 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3545,7 +3606,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3545 struct extent_map *em = NULL; 3606 struct extent_map *em = NULL;
3546 struct extent_state *cached_state = NULL; 3607 struct extent_state *cached_state = NULL;
3547 u64 mask = root->sectorsize - 1; 3608 u64 mask = root->sectorsize - 1;
3548 u64 hole_start = (inode->i_size + mask) & ~mask; 3609 u64 hole_start = (oldsize + mask) & ~mask;
3549 u64 block_end = (size + mask) & ~mask; 3610 u64 block_end = (size + mask) & ~mask;
3550 u64 last_byte; 3611 u64 last_byte;
3551 u64 cur_offset; 3612 u64 cur_offset;
@@ -3590,13 +3651,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3590 err = btrfs_drop_extents(trans, inode, cur_offset, 3651 err = btrfs_drop_extents(trans, inode, cur_offset,
3591 cur_offset + hole_size, 3652 cur_offset + hole_size,
3592 &hint_byte, 1); 3653 &hint_byte, 1);
3593 BUG_ON(err); 3654 if (err)
3655 break;
3594 3656
3595 err = btrfs_insert_file_extent(trans, root, 3657 err = btrfs_insert_file_extent(trans, root,
3596 inode->i_ino, cur_offset, 0, 3658 inode->i_ino, cur_offset, 0,
3597 0, hole_size, 0, hole_size, 3659 0, hole_size, 0, hole_size,
3598 0, 0, 0); 3660 0, 0, 0);
3599 BUG_ON(err); 3661 if (err)
3662 break;
3600 3663
3601 btrfs_drop_extent_cache(inode, hole_start, 3664 btrfs_drop_extent_cache(inode, hole_start,
3602 last_byte - 1, 0); 3665 last_byte - 1, 0);
@@ -3616,81 +3679,41 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3616 return err; 3679 return err;
3617} 3680}
3618 3681
3619static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) 3682static int btrfs_setsize(struct inode *inode, loff_t newsize)
3620{ 3683{
3621 struct btrfs_root *root = BTRFS_I(inode)->root; 3684 loff_t oldsize = i_size_read(inode);
3622 struct btrfs_trans_handle *trans;
3623 unsigned long nr;
3624 int ret; 3685 int ret;
3625 3686
3626 if (attr->ia_size == inode->i_size) 3687 if (newsize == oldsize)
3627 return 0; 3688 return 0;
3628 3689
3629 if (attr->ia_size > inode->i_size) { 3690 if (newsize > oldsize) {
3630 unsigned long limit; 3691 i_size_write(inode, newsize);
3631 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 3692 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3632 if (attr->ia_size > inode->i_sb->s_maxbytes) 3693 truncate_pagecache(inode, oldsize, newsize);
3633 return -EFBIG; 3694 ret = btrfs_cont_expand(inode, oldsize, newsize);
3634 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3635 send_sig(SIGXFSZ, current, 0);
3636 return -EFBIG;
3637 }
3638 }
3639
3640 trans = btrfs_start_transaction(root, 5);
3641 if (IS_ERR(trans))
3642 return PTR_ERR(trans);
3643
3644 btrfs_set_trans_block_group(trans, inode);
3645
3646 ret = btrfs_orphan_add(trans, inode);
3647 BUG_ON(ret);
3648
3649 nr = trans->blocks_used;
3650 btrfs_end_transaction(trans, root);
3651 btrfs_btree_balance_dirty(root, nr);
3652
3653 if (attr->ia_size > inode->i_size) {
3654 ret = btrfs_cont_expand(inode, attr->ia_size);
3655 if (ret) { 3695 if (ret) {
3656 btrfs_truncate(inode); 3696 btrfs_setsize(inode, oldsize);
3657 return ret; 3697 return ret;
3658 } 3698 }
3659 3699
3660 i_size_write(inode, attr->ia_size); 3700 mark_inode_dirty(inode);
3661 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 3701 } else {
3662 3702
3663 trans = btrfs_start_transaction(root, 0); 3703 /*
3664 BUG_ON(IS_ERR(trans)); 3704 * We're truncating a file that used to have good data down to
3665 btrfs_set_trans_block_group(trans, inode); 3705 * zero. Make sure it gets into the ordered flush list so that
3666 trans->block_rsv = root->orphan_block_rsv; 3706 * any new writes get down to disk quickly.
3667 BUG_ON(!trans->block_rsv); 3707 */
3708 if (newsize == 0)
3709 BTRFS_I(inode)->ordered_data_close = 1;
3668 3710
3669 ret = btrfs_update_inode(trans, root, inode); 3711 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3670 BUG_ON(ret); 3712 truncate_setsize(inode, newsize);
3671 if (inode->i_nlink > 0) { 3713 ret = btrfs_truncate(inode);
3672 ret = btrfs_orphan_del(trans, inode);
3673 BUG_ON(ret);
3674 }
3675 nr = trans->blocks_used;
3676 btrfs_end_transaction(trans, root);
3677 btrfs_btree_balance_dirty(root, nr);
3678 return 0;
3679 } 3714 }
3680 3715
3681 /* 3716 return ret;
3682 * We're truncating a file that used to have good data down to
3683 * zero. Make sure it gets into the ordered flush list so that
3684 * any new writes get down to disk quickly.
3685 */
3686 if (attr->ia_size == 0)
3687 BTRFS_I(inode)->ordered_data_close = 1;
3688
3689 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3690 ret = vmtruncate(inode, attr->ia_size);
3691 BUG_ON(ret);
3692
3693 return 0;
3694} 3717}
3695 3718
3696static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3719static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -3707,7 +3730,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3707 return err; 3730 return err;
3708 3731
3709 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3732 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3710 err = btrfs_setattr_size(inode, attr); 3733 err = btrfs_setsize(inode, attr->ia_size);
3711 if (err) 3734 if (err)
3712 return err; 3735 return err;
3713 } 3736 }
@@ -3730,6 +3753,8 @@ void btrfs_evict_inode(struct inode *inode)
3730 unsigned long nr; 3753 unsigned long nr;
3731 int ret; 3754 int ret;
3732 3755
3756 trace_btrfs_inode_evict(inode);
3757
3733 truncate_inode_pages(&inode->i_data, 0); 3758 truncate_inode_pages(&inode->i_data, 0);
3734 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3759 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3735 root == root->fs_info->tree_root)) 3760 root == root->fs_info->tree_root))
@@ -4072,7 +4097,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
4072 BTRFS_I(inode)->root = root; 4097 BTRFS_I(inode)->root = root;
4073 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 4098 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
4074 btrfs_read_locked_inode(inode); 4099 btrfs_read_locked_inode(inode);
4075
4076 inode_tree_add(inode); 4100 inode_tree_add(inode);
4077 unlock_new_inode(inode); 4101 unlock_new_inode(inode);
4078 if (new) 4102 if (new)
@@ -4147,8 +4171,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4147 if (!IS_ERR(inode) && root != sub_root) { 4171 if (!IS_ERR(inode) && root != sub_root) {
4148 down_read(&root->fs_info->cleanup_work_sem); 4172 down_read(&root->fs_info->cleanup_work_sem);
4149 if (!(inode->i_sb->s_flags & MS_RDONLY)) 4173 if (!(inode->i_sb->s_flags & MS_RDONLY))
4150 btrfs_orphan_cleanup(sub_root); 4174 ret = btrfs_orphan_cleanup(sub_root);
4151 up_read(&root->fs_info->cleanup_work_sem); 4175 up_read(&root->fs_info->cleanup_work_sem);
4176 if (ret)
4177 inode = ERR_PTR(ret);
4152 } 4178 }
4153 4179
4154 return inode; 4180 return inode;
@@ -4196,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4196 struct btrfs_key found_key; 4222 struct btrfs_key found_key;
4197 struct btrfs_path *path; 4223 struct btrfs_path *path;
4198 int ret; 4224 int ret;
4199 u32 nritems;
4200 struct extent_buffer *leaf; 4225 struct extent_buffer *leaf;
4201 int slot; 4226 int slot;
4202 int advance;
4203 unsigned char d_type; 4227 unsigned char d_type;
4204 int over = 0; 4228 int over = 0;
4205 u32 di_cur; 4229 u32 di_cur;
@@ -4242,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4242 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4266 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4243 if (ret < 0) 4267 if (ret < 0)
4244 goto err; 4268 goto err;
4245 advance = 0;
4246 4269
4247 while (1) { 4270 while (1) {
4248 leaf = path->nodes[0]; 4271 leaf = path->nodes[0];
4249 nritems = btrfs_header_nritems(leaf);
4250 slot = path->slots[0]; 4272 slot = path->slots[0];
4251 if (advance || slot >= nritems) { 4273 if (slot >= btrfs_header_nritems(leaf)) {
4252 if (slot >= nritems - 1) { 4274 ret = btrfs_next_leaf(root, path);
4253 ret = btrfs_next_leaf(root, path); 4275 if (ret < 0)
4254 if (ret) 4276 goto err;
4255 break; 4277 else if (ret > 0)
4256 leaf = path->nodes[0]; 4278 break;
4257 nritems = btrfs_header_nritems(leaf); 4279 continue;
4258 slot = path->slots[0];
4259 } else {
4260 slot++;
4261 path->slots[0]++;
4262 }
4263 } 4280 }
4264 4281
4265 advance = 1;
4266 item = btrfs_item_nr(leaf, slot); 4282 item = btrfs_item_nr(leaf, slot);
4267 btrfs_item_key_to_cpu(leaf, &found_key, slot); 4283 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4268 4284
@@ -4271,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4271 if (btrfs_key_type(&found_key) != key_type) 4287 if (btrfs_key_type(&found_key) != key_type)
4272 break; 4288 break;
4273 if (found_key.offset < filp->f_pos) 4289 if (found_key.offset < filp->f_pos)
4274 continue; 4290 goto next;
4275 4291
4276 filp->f_pos = found_key.offset; 4292 filp->f_pos = found_key.offset;
4277 4293
@@ -4282,6 +4298,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4282 while (di_cur < di_total) { 4298 while (di_cur < di_total) {
4283 struct btrfs_key location; 4299 struct btrfs_key location;
4284 4300
4301 if (verify_dir_item(root, leaf, di))
4302 break;
4303
4285 name_len = btrfs_dir_name_len(leaf, di); 4304 name_len = btrfs_dir_name_len(leaf, di);
4286 if (name_len <= sizeof(tmp_name)) { 4305 if (name_len <= sizeof(tmp_name)) {
4287 name_ptr = tmp_name; 4306 name_ptr = tmp_name;
@@ -4321,6 +4340,8 @@ skip:
4321 di_cur += di_len; 4340 di_cur += di_len;
4322 di = (struct btrfs_dir_item *)((char *)di + di_len); 4341 di = (struct btrfs_dir_item *)((char *)di + di_len);
4323 } 4342 }
4343next:
4344 path->slots[0]++;
4324 } 4345 }
4325 4346
4326 /* Reached end of directory/root. Bump pos past the last item. */ 4347 /* Reached end of directory/root. Bump pos past the last item. */
@@ -4513,12 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4513 BUG_ON(!path); 4534 BUG_ON(!path);
4514 4535
4515 inode = new_inode(root->fs_info->sb); 4536 inode = new_inode(root->fs_info->sb);
4516 if (!inode) 4537 if (!inode) {
4538 btrfs_free_path(path);
4517 return ERR_PTR(-ENOMEM); 4539 return ERR_PTR(-ENOMEM);
4540 }
4518 4541
4519 if (dir) { 4542 if (dir) {
4543 trace_btrfs_inode_request(dir);
4544
4520 ret = btrfs_set_inode_index(dir, index); 4545 ret = btrfs_set_inode_index(dir, index);
4521 if (ret) { 4546 if (ret) {
4547 btrfs_free_path(path);
4522 iput(inode); 4548 iput(inode);
4523 return ERR_PTR(ret); 4549 return ERR_PTR(ret);
4524 } 4550 }
@@ -4585,12 +4611,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4585 if ((mode & S_IFREG)) { 4611 if ((mode & S_IFREG)) {
4586 if (btrfs_test_opt(root, NODATASUM)) 4612 if (btrfs_test_opt(root, NODATASUM))
4587 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 4613 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
4588 if (btrfs_test_opt(root, NODATACOW)) 4614 if (btrfs_test_opt(root, NODATACOW) ||
4615 (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
4589 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 4616 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
4590 } 4617 }
4591 4618
4592 insert_inode_hash(inode); 4619 insert_inode_hash(inode);
4593 inode_tree_add(inode); 4620 inode_tree_add(inode);
4621
4622 trace_btrfs_inode_new(inode);
4623
4594 return inode; 4624 return inode;
4595fail: 4625fail:
4596 if (dir) 4626 if (dir)
@@ -4809,10 +4839,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4809 4839
4810 /* do not allow sys_link's with other subvols of the same device */ 4840 /* do not allow sys_link's with other subvols of the same device */
4811 if (root->objectid != BTRFS_I(inode)->root->objectid) 4841 if (root->objectid != BTRFS_I(inode)->root->objectid)
4812 return -EPERM; 4842 return -EXDEV;
4813 4843
4814 btrfs_inc_nlink(inode); 4844 if (inode->i_nlink == ~0U)
4815 inode->i_ctime = CURRENT_TIME; 4845 return -EMLINK;
4816 4846
4817 err = btrfs_set_inode_index(dir, &index); 4847 err = btrfs_set_inode_index(dir, &index);
4818 if (err) 4848 if (err)
@@ -4829,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4829 goto fail; 4859 goto fail;
4830 } 4860 }
4831 4861
4862 btrfs_inc_nlink(inode);
4863 inode->i_ctime = CURRENT_TIME;
4864
4832 btrfs_set_trans_block_group(trans, dir); 4865 btrfs_set_trans_block_group(trans, dir);
4833 ihold(inode); 4866 ihold(inode);
4834 4867
@@ -5198,7 +5231,7 @@ again:
5198 btrfs_mark_buffer_dirty(leaf); 5231 btrfs_mark_buffer_dirty(leaf);
5199 } 5232 }
5200 set_extent_uptodate(io_tree, em->start, 5233 set_extent_uptodate(io_tree, em->start,
5201 extent_map_end(em) - 1, GFP_NOFS); 5234 extent_map_end(em) - 1, NULL, GFP_NOFS);
5202 goto insert; 5235 goto insert;
5203 } else { 5236 } else {
5204 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); 5237 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5265,6 +5298,9 @@ insert:
5265 } 5298 }
5266 write_unlock(&em_tree->lock); 5299 write_unlock(&em_tree->lock);
5267out: 5300out:
5301
5302 trace_btrfs_get_extent(root, em);
5303
5268 if (path) 5304 if (path)
5269 btrfs_free_path(path); 5305 btrfs_free_path(path);
5270 if (trans) { 5306 if (trans) {
@@ -5402,17 +5438,30 @@ out:
5402} 5438}
5403 5439
5404static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5440static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5441 struct extent_map *em,
5405 u64 start, u64 len) 5442 u64 start, u64 len)
5406{ 5443{
5407 struct btrfs_root *root = BTRFS_I(inode)->root; 5444 struct btrfs_root *root = BTRFS_I(inode)->root;
5408 struct btrfs_trans_handle *trans; 5445 struct btrfs_trans_handle *trans;
5409 struct extent_map *em;
5410 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5446 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5411 struct btrfs_key ins; 5447 struct btrfs_key ins;
5412 u64 alloc_hint; 5448 u64 alloc_hint;
5413 int ret; 5449 int ret;
5450 bool insert = false;
5414 5451
5415 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5452 /*
5453 * Ok if the extent map we looked up is a hole and is for the exact
5454 * range we want, there is no reason to allocate a new one, however if
5455 * it is not right then we need to free this one and drop the cache for
5456 * our range.
5457 */
5458 if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
5459 em->len != len) {
5460 free_extent_map(em);
5461 em = NULL;
5462 insert = true;
5463 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5464 }
5416 5465
5417 trans = btrfs_join_transaction(root, 0); 5466 trans = btrfs_join_transaction(root, 0);
5418 if (IS_ERR(trans)) 5467 if (IS_ERR(trans))
@@ -5428,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5428 goto out; 5477 goto out;
5429 } 5478 }
5430 5479
5431 em = alloc_extent_map(GFP_NOFS);
5432 if (!em) { 5480 if (!em) {
5433 em = ERR_PTR(-ENOMEM); 5481 em = alloc_extent_map(GFP_NOFS);
5434 goto out; 5482 if (!em) {
5483 em = ERR_PTR(-ENOMEM);
5484 goto out;
5485 }
5435 } 5486 }
5436 5487
5437 em->start = start; 5488 em->start = start;
@@ -5441,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5441 em->block_start = ins.objectid; 5492 em->block_start = ins.objectid;
5442 em->block_len = ins.offset; 5493 em->block_len = ins.offset;
5443 em->bdev = root->fs_info->fs_devices->latest_bdev; 5494 em->bdev = root->fs_info->fs_devices->latest_bdev;
5495
5496 /*
5497 * We need to do this because if we're using the original em we searched
5498 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
5499 */
5500 em->flags = 0;
5444 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5501 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5445 5502
5446 while (1) { 5503 while (insert) {
5447 write_lock(&em_tree->lock); 5504 write_lock(&em_tree->lock);
5448 ret = add_extent_mapping(em_tree, em); 5505 ret = add_extent_mapping(em_tree, em);
5449 write_unlock(&em_tree->lock); 5506 write_unlock(&em_tree->lock);
@@ -5661,8 +5718,7 @@ must_cow:
5661 * it above 5718 * it above
5662 */ 5719 */
5663 len = bh_result->b_size; 5720 len = bh_result->b_size;
5664 free_extent_map(em); 5721 em = btrfs_new_extent_direct(inode, em, start, len);
5665 em = btrfs_new_extent_direct(inode, start, len);
5666 if (IS_ERR(em)) 5722 if (IS_ERR(em))
5667 return PTR_ERR(em); 5723 return PTR_ERR(em);
5668 len = min(len, em->len - (start - em->start)); 5724 len = min(len, em->len - (start - em->start));
@@ -5748,6 +5804,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5748 5804
5749 kfree(dip->csums); 5805 kfree(dip->csums);
5750 kfree(dip); 5806 kfree(dip);
5807
5808 /* If we had a csum failure make sure to clear the uptodate flag */
5809 if (err)
5810 clear_bit(BIO_UPTODATE, &bio->bi_flags);
5751 dio_end_io(bio, err); 5811 dio_end_io(bio, err);
5752} 5812}
5753 5813
@@ -5821,8 +5881,10 @@ again:
5821 } 5881 }
5822 5882
5823 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5883 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5824 btrfs_ordered_update_i_size(inode, 0, ordered); 5884 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5825 btrfs_update_inode(trans, root, inode); 5885 if (!ret)
5886 btrfs_update_inode(trans, root, inode);
5887 ret = 0;
5826out_unlock: 5888out_unlock:
5827 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5889 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5828 ordered->file_offset + ordered->len - 1, 5890 ordered->file_offset + ordered->len - 1,
@@ -5849,6 +5911,10 @@ out_done:
5849 5911
5850 kfree(dip->csums); 5912 kfree(dip->csums);
5851 kfree(dip); 5913 kfree(dip);
5914
5915 /* If we had an error make sure to clear the uptodate flag */
5916 if (err)
5917 clear_bit(BIO_UPTODATE, &bio->bi_flags);
5852 dio_end_io(bio, err); 5918 dio_end_io(bio, err);
5853} 5919}
5854 5920
@@ -5904,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5904 5970
5905static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 5971static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5906 int rw, u64 file_offset, int skip_sum, 5972 int rw, u64 file_offset, int skip_sum,
5907 u32 *csums) 5973 u32 *csums, int async_submit)
5908{ 5974{
5909 int write = rw & REQ_WRITE; 5975 int write = rw & REQ_WRITE;
5910 struct btrfs_root *root = BTRFS_I(inode)->root; 5976 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5915,18 +5981,33 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5915 if (ret) 5981 if (ret)
5916 goto err; 5982 goto err;
5917 5983
5918 if (write && !skip_sum) { 5984 if (skip_sum)
5985 goto map;
5986
5987 if (write && async_submit) {
5919 ret = btrfs_wq_submit_bio(root->fs_info, 5988 ret = btrfs_wq_submit_bio(root->fs_info,
5920 inode, rw, bio, 0, 0, 5989 inode, rw, bio, 0, 0,
5921 file_offset, 5990 file_offset,
5922 __btrfs_submit_bio_start_direct_io, 5991 __btrfs_submit_bio_start_direct_io,
5923 __btrfs_submit_bio_done); 5992 __btrfs_submit_bio_done);
5924 goto err; 5993 goto err;
5925 } else if (!skip_sum) 5994 } else if (write) {
5926 btrfs_lookup_bio_sums_dio(root, inode, bio, 5995 /*
5996 * If we aren't doing async submit, calculate the csum of the
5997 * bio now.
5998 */
5999 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
6000 if (ret)
6001 goto err;
6002 } else if (!skip_sum) {
6003 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5927 file_offset, csums); 6004 file_offset, csums);
6005 if (ret)
6006 goto err;
6007 }
5928 6008
5929 ret = btrfs_map_bio(root, rw, bio, 0, 1); 6009map:
6010 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
5930err: 6011err:
5931 bio_put(bio); 6012 bio_put(bio);
5932 return ret; 6013 return ret;
@@ -5948,13 +6029,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5948 int nr_pages = 0; 6029 int nr_pages = 0;
5949 u32 *csums = dip->csums; 6030 u32 *csums = dip->csums;
5950 int ret = 0; 6031 int ret = 0;
5951 6032 int async_submit = 0;
5952 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); 6033 int write = rw & REQ_WRITE;
5953 if (!bio)
5954 return -ENOMEM;
5955 bio->bi_private = dip;
5956 bio->bi_end_io = btrfs_end_dio_bio;
5957 atomic_inc(&dip->pending_bios);
5958 6034
5959 map_length = orig_bio->bi_size; 6035 map_length = orig_bio->bi_size;
5960 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6036 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -5964,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5964 return -EIO; 6040 return -EIO;
5965 } 6041 }
5966 6042
6043 if (map_length >= orig_bio->bi_size) {
6044 bio = orig_bio;
6045 goto submit;
6046 }
6047
6048 async_submit = 1;
6049 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6050 if (!bio)
6051 return -ENOMEM;
6052 bio->bi_private = dip;
6053 bio->bi_end_io = btrfs_end_dio_bio;
6054 atomic_inc(&dip->pending_bios);
6055
5967 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 6056 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
5968 if (unlikely(map_length < submit_len + bvec->bv_len || 6057 if (unlikely(map_length < submit_len + bvec->bv_len ||
5969 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 6058 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -5977,14 +6066,15 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5977 atomic_inc(&dip->pending_bios); 6066 atomic_inc(&dip->pending_bios);
5978 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6067 ret = __btrfs_submit_dio_bio(bio, inode, rw,
5979 file_offset, skip_sum, 6068 file_offset, skip_sum,
5980 csums); 6069 csums, async_submit);
5981 if (ret) { 6070 if (ret) {
5982 bio_put(bio); 6071 bio_put(bio);
5983 atomic_dec(&dip->pending_bios); 6072 atomic_dec(&dip->pending_bios);
5984 goto out_err; 6073 goto out_err;
5985 } 6074 }
5986 6075
5987 if (!skip_sum) 6076 /* Write's use the ordered csums */
6077 if (!write && !skip_sum)
5988 csums = csums + nr_pages; 6078 csums = csums + nr_pages;
5989 start_sector += submit_len >> 9; 6079 start_sector += submit_len >> 9;
5990 file_offset += submit_len; 6080 file_offset += submit_len;
@@ -6013,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6013 } 6103 }
6014 } 6104 }
6015 6105
6106submit:
6016 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6107 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6017 csums); 6108 csums, async_submit);
6018 if (!ret) 6109 if (!ret)
6019 return 0; 6110 return 0;
6020 6111
@@ -6052,7 +6143,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6052 } 6143 }
6053 dip->csums = NULL; 6144 dip->csums = NULL;
6054 6145
6055 if (!skip_sum) { 6146 /* Write's use the ordered csum stuff, so we don't need dip->csums */
6147 if (!write && !skip_sum) {
6056 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6148 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6057 if (!dip->csums) { 6149 if (!dip->csums) {
6058 kfree(dip); 6150 kfree(dip);
@@ -6108,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6108 unsigned long nr_segs) 6200 unsigned long nr_segs)
6109{ 6201{
6110 int seg; 6202 int seg;
6203 int i;
6111 size_t size; 6204 size_t size;
6112 unsigned long addr; 6205 unsigned long addr;
6113 unsigned blocksize_mask = root->sectorsize - 1; 6206 unsigned blocksize_mask = root->sectorsize - 1;
@@ -6122,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6122 addr = (unsigned long)iov[seg].iov_base; 6215 addr = (unsigned long)iov[seg].iov_base;
6123 size = iov[seg].iov_len; 6216 size = iov[seg].iov_len;
6124 end += size; 6217 end += size;
6125 if ((addr & blocksize_mask) || (size & blocksize_mask)) 6218 if ((addr & blocksize_mask) || (size & blocksize_mask))
6126 goto out; 6219 goto out;
6220
6221 /* If this is a write we don't need to check anymore */
6222 if (rw & WRITE)
6223 continue;
6224
6225 /*
6226 * Check to make sure we don't have duplicate iov_base's in this
6227 * iovec, if so return EINVAL, otherwise we'll get csum errors
6228 * when reading back.
6229 */
6230 for (i = seg + 1; i < nr_segs; i++) {
6231 if (iov[seg].iov_base == iov[i].iov_base)
6232 goto out;
6233 }
6127 } 6234 }
6128 retval = 0; 6235 retval = 0;
6129out: 6236out:
@@ -6474,28 +6581,42 @@ out:
6474 return ret; 6581 return ret;
6475} 6582}
6476 6583
6477static void btrfs_truncate(struct inode *inode) 6584static int btrfs_truncate(struct inode *inode)
6478{ 6585{
6479 struct btrfs_root *root = BTRFS_I(inode)->root; 6586 struct btrfs_root *root = BTRFS_I(inode)->root;
6480 int ret; 6587 int ret;
6588 int err = 0;
6481 struct btrfs_trans_handle *trans; 6589 struct btrfs_trans_handle *trans;
6482 unsigned long nr; 6590 unsigned long nr;
6483 u64 mask = root->sectorsize - 1; 6591 u64 mask = root->sectorsize - 1;
6484 6592
6485 if (!S_ISREG(inode->i_mode)) {
6486 WARN_ON(1);
6487 return;
6488 }
6489
6490 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 6593 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
6491 if (ret) 6594 if (ret)
6492 return; 6595 return ret;
6493 6596
6494 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6597 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
6495 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6598 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
6496 6599
6600 trans = btrfs_start_transaction(root, 5);
6601 if (IS_ERR(trans))
6602 return PTR_ERR(trans);
6603
6604 btrfs_set_trans_block_group(trans, inode);
6605
6606 ret = btrfs_orphan_add(trans, inode);
6607 if (ret) {
6608 btrfs_end_transaction(trans, root);
6609 return ret;
6610 }
6611
6612 nr = trans->blocks_used;
6613 btrfs_end_transaction(trans, root);
6614 btrfs_btree_balance_dirty(root, nr);
6615
6616 /* Now start a transaction for the truncate */
6497 trans = btrfs_start_transaction(root, 0); 6617 trans = btrfs_start_transaction(root, 0);
6498 BUG_ON(IS_ERR(trans)); 6618 if (IS_ERR(trans))
6619 return PTR_ERR(trans);
6499 btrfs_set_trans_block_group(trans, inode); 6620 btrfs_set_trans_block_group(trans, inode);
6500 trans->block_rsv = root->orphan_block_rsv; 6621 trans->block_rsv = root->orphan_block_rsv;
6501 6622
@@ -6522,29 +6643,38 @@ static void btrfs_truncate(struct inode *inode)
6522 while (1) { 6643 while (1) {
6523 if (!trans) { 6644 if (!trans) {
6524 trans = btrfs_start_transaction(root, 0); 6645 trans = btrfs_start_transaction(root, 0);
6525 BUG_ON(IS_ERR(trans)); 6646 if (IS_ERR(trans))
6647 return PTR_ERR(trans);
6526 btrfs_set_trans_block_group(trans, inode); 6648 btrfs_set_trans_block_group(trans, inode);
6527 trans->block_rsv = root->orphan_block_rsv; 6649 trans->block_rsv = root->orphan_block_rsv;
6528 } 6650 }
6529 6651
6530 ret = btrfs_block_rsv_check(trans, root, 6652 ret = btrfs_block_rsv_check(trans, root,
6531 root->orphan_block_rsv, 0, 5); 6653 root->orphan_block_rsv, 0, 5);
6532 if (ret) { 6654 if (ret == -EAGAIN) {
6533 BUG_ON(ret != -EAGAIN);
6534 ret = btrfs_commit_transaction(trans, root); 6655 ret = btrfs_commit_transaction(trans, root);
6535 BUG_ON(ret); 6656 if (ret)
6657 return ret;
6536 trans = NULL; 6658 trans = NULL;
6537 continue; 6659 continue;
6660 } else if (ret) {
6661 err = ret;
6662 break;
6538 } 6663 }
6539 6664
6540 ret = btrfs_truncate_inode_items(trans, root, inode, 6665 ret = btrfs_truncate_inode_items(trans, root, inode,
6541 inode->i_size, 6666 inode->i_size,
6542 BTRFS_EXTENT_DATA_KEY); 6667 BTRFS_EXTENT_DATA_KEY);
6543 if (ret != -EAGAIN) 6668 if (ret != -EAGAIN) {
6669 err = ret;
6544 break; 6670 break;
6671 }
6545 6672
6546 ret = btrfs_update_inode(trans, root, inode); 6673 ret = btrfs_update_inode(trans, root, inode);
6547 BUG_ON(ret); 6674 if (ret) {
6675 err = ret;
6676 break;
6677 }
6548 6678
6549 nr = trans->blocks_used; 6679 nr = trans->blocks_used;
6550 btrfs_end_transaction(trans, root); 6680 btrfs_end_transaction(trans, root);
@@ -6554,16 +6684,27 @@ static void btrfs_truncate(struct inode *inode)
6554 6684
6555 if (ret == 0 && inode->i_nlink > 0) { 6685 if (ret == 0 && inode->i_nlink > 0) {
6556 ret = btrfs_orphan_del(trans, inode); 6686 ret = btrfs_orphan_del(trans, inode);
6557 BUG_ON(ret); 6687 if (ret)
6688 err = ret;
6689 } else if (ret && inode->i_nlink > 0) {
6690 /*
6691 * Failed to do the truncate, remove us from the in memory
6692 * orphan list.
6693 */
6694 ret = btrfs_orphan_del(NULL, inode);
6558 } 6695 }
6559 6696
6560 ret = btrfs_update_inode(trans, root, inode); 6697 ret = btrfs_update_inode(trans, root, inode);
6561 BUG_ON(ret); 6698 if (ret && !err)
6699 err = ret;
6562 6700
6563 nr = trans->blocks_used; 6701 nr = trans->blocks_used;
6564 ret = btrfs_end_transaction_throttle(trans, root); 6702 ret = btrfs_end_transaction_throttle(trans, root);
6565 BUG_ON(ret); 6703 if (ret && !err)
6704 err = ret;
6566 btrfs_btree_balance_dirty(root, nr); 6705 btrfs_btree_balance_dirty(root, nr);
6706
6707 return err;
6567} 6708}
6568 6709
6569/* 6710/*
@@ -6630,9 +6771,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6630 ei->index_cnt = (u64)-1; 6771 ei->index_cnt = (u64)-1;
6631 ei->last_unlink_trans = 0; 6772 ei->last_unlink_trans = 0;
6632 6773
6633 spin_lock_init(&ei->accounting_lock);
6634 atomic_set(&ei->outstanding_extents, 0); 6774 atomic_set(&ei->outstanding_extents, 0);
6635 ei->reserved_extents = 0; 6775 atomic_set(&ei->reserved_extents, 0);
6636 6776
6637 ei->ordered_data_close = 0; 6777 ei->ordered_data_close = 0;
6638 ei->orphan_meta_reserved = 0; 6778 ei->orphan_meta_reserved = 0;
@@ -6668,7 +6808,7 @@ void btrfs_destroy_inode(struct inode *inode)
6668 WARN_ON(!list_empty(&inode->i_dentry)); 6808 WARN_ON(!list_empty(&inode->i_dentry));
6669 WARN_ON(inode->i_data.nrpages); 6809 WARN_ON(inode->i_data.nrpages);
6670 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); 6810 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
6671 WARN_ON(BTRFS_I(inode)->reserved_extents); 6811 WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
6672 6812
6673 /* 6813 /*
6674 * This can happen where we create an inode, but somebody else also 6814 * This can happen where we create an inode, but somebody else also
@@ -6760,6 +6900,8 @@ void btrfs_destroy_cachep(void)
6760 kmem_cache_destroy(btrfs_transaction_cachep); 6900 kmem_cache_destroy(btrfs_transaction_cachep);
6761 if (btrfs_path_cachep) 6901 if (btrfs_path_cachep)
6762 kmem_cache_destroy(btrfs_path_cachep); 6902 kmem_cache_destroy(btrfs_path_cachep);
6903 if (btrfs_free_space_cachep)
6904 kmem_cache_destroy(btrfs_free_space_cachep);
6763} 6905}
6764 6906
6765int btrfs_init_cachep(void) 6907int btrfs_init_cachep(void)
@@ -6788,6 +6930,12 @@ int btrfs_init_cachep(void)
6788 if (!btrfs_path_cachep) 6930 if (!btrfs_path_cachep)
6789 goto fail; 6931 goto fail;
6790 6932
6933 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
6934 sizeof(struct btrfs_free_space), 0,
6935 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
6936 if (!btrfs_free_space_cachep)
6937 goto fail;
6938
6791 return 0; 6939 return 0;
6792fail: 6940fail:
6793 btrfs_destroy_cachep(); 6941 btrfs_destroy_cachep();
@@ -6806,6 +6954,26 @@ static int btrfs_getattr(struct vfsmount *mnt,
6806 return 0; 6954 return 0;
6807} 6955}
6808 6956
6957/*
6958 * If a file is moved, it will inherit the cow and compression flags of the new
6959 * directory.
6960 */
6961static void fixup_inode_flags(struct inode *dir, struct inode *inode)
6962{
6963 struct btrfs_inode *b_dir = BTRFS_I(dir);
6964 struct btrfs_inode *b_inode = BTRFS_I(inode);
6965
6966 if (b_dir->flags & BTRFS_INODE_NODATACOW)
6967 b_inode->flags |= BTRFS_INODE_NODATACOW;
6968 else
6969 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
6970
6971 if (b_dir->flags & BTRFS_INODE_COMPRESS)
6972 b_inode->flags |= BTRFS_INODE_COMPRESS;
6973 else
6974 b_inode->flags &= ~BTRFS_INODE_COMPRESS;
6975}
6976
6809static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 6977static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6810 struct inode *new_dir, struct dentry *new_dentry) 6978 struct inode *new_dir, struct dentry *new_dentry)
6811{ 6979{
@@ -6854,8 +7022,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6854 * should cover the worst case number of items we'll modify. 7022 * should cover the worst case number of items we'll modify.
6855 */ 7023 */
6856 trans = btrfs_start_transaction(root, 20); 7024 trans = btrfs_start_transaction(root, 20);
6857 if (IS_ERR(trans)) 7025 if (IS_ERR(trans)) {
6858 return PTR_ERR(trans); 7026 ret = PTR_ERR(trans);
7027 goto out_notrans;
7028 }
6859 7029
6860 btrfs_set_trans_block_group(trans, new_dir); 7030 btrfs_set_trans_block_group(trans, new_dir);
6861 7031
@@ -6908,11 +7078,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6908 old_dentry->d_name.name, 7078 old_dentry->d_name.name,
6909 old_dentry->d_name.len); 7079 old_dentry->d_name.len);
6910 } else { 7080 } else {
6911 btrfs_inc_nlink(old_dentry->d_inode); 7081 ret = __btrfs_unlink_inode(trans, root, old_dir,
6912 ret = btrfs_unlink_inode(trans, root, old_dir, 7082 old_dentry->d_inode,
6913 old_dentry->d_inode, 7083 old_dentry->d_name.name,
6914 old_dentry->d_name.name, 7084 old_dentry->d_name.len);
6915 old_dentry->d_name.len); 7085 if (!ret)
7086 ret = btrfs_update_inode(trans, root, old_inode);
6916 } 7087 }
6917 BUG_ON(ret); 7088 BUG_ON(ret);
6918 7089
@@ -6939,6 +7110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6939 } 7110 }
6940 } 7111 }
6941 7112
7113 fixup_inode_flags(new_dir, old_inode);
7114
6942 ret = btrfs_add_link(trans, new_dir, old_inode, 7115 ret = btrfs_add_link(trans, new_dir, old_inode,
6943 new_dentry->d_name.name, 7116 new_dentry->d_name.name,
6944 new_dentry->d_name.len, 0, index); 7117 new_dentry->d_name.len, 0, index);
@@ -6952,7 +7125,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6952 } 7125 }
6953out_fail: 7126out_fail:
6954 btrfs_end_transaction_throttle(trans, root); 7127 btrfs_end_transaction_throttle(trans, root);
6955 7128out_notrans:
6956 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7129 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
6957 up_read(&root->fs_info->subvol_sem); 7130 up_read(&root->fs_info->subvol_sem);
6958 7131
@@ -7340,7 +7513,6 @@ static const struct address_space_operations btrfs_aops = {
7340 .writepage = btrfs_writepage, 7513 .writepage = btrfs_writepage,
7341 .writepages = btrfs_writepages, 7514 .writepages = btrfs_writepages,
7342 .readpages = btrfs_readpages, 7515 .readpages = btrfs_readpages,
7343 .sync_page = block_sync_page,
7344 .direct_IO = btrfs_direct_IO, 7516 .direct_IO = btrfs_direct_IO,
7345 .invalidatepage = btrfs_invalidatepage, 7517 .invalidatepage = btrfs_invalidatepage,
7346 .releasepage = btrfs_releasepage, 7518 .releasepage = btrfs_releasepage,
@@ -7356,7 +7528,6 @@ static const struct address_space_operations btrfs_symlink_aops = {
7356}; 7528};
7357 7529
7358static const struct inode_operations btrfs_file_inode_operations = { 7530static const struct inode_operations btrfs_file_inode_operations = {
7359 .truncate = btrfs_truncate,
7360 .getattr = btrfs_getattr, 7531 .getattr = btrfs_getattr,
7361 .setattr = btrfs_setattr, 7532 .setattr = btrfs_setattr,
7362 .setxattr = btrfs_setxattr, 7533 .setxattr = btrfs_setxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5fdb2abc4fa7..ffb48d6c5433 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -40,6 +40,7 @@
40#include <linux/xattr.h> 40#include <linux/xattr.h>
41#include <linux/vmalloc.h> 41#include <linux/vmalloc.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/blkdev.h>
43#include "compat.h" 44#include "compat.h"
44#include "ctree.h" 45#include "ctree.h"
45#include "disk-io.h" 46#include "disk-io.h"
@@ -138,6 +139,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
138 return 0; 139 return 0;
139} 140}
140 141
142static int check_flags(unsigned int flags)
143{
144 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
145 FS_NOATIME_FL | FS_NODUMP_FL | \
146 FS_SYNC_FL | FS_DIRSYNC_FL | \
147 FS_NOCOMP_FL | FS_COMPR_FL | \
148 FS_NOCOW_FL | FS_COW_FL))
149 return -EOPNOTSUPP;
150
151 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
152 return -EINVAL;
153
154 if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
155 return -EINVAL;
156
157 return 0;
158}
159
141static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 160static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
142{ 161{
143 struct inode *inode = file->f_path.dentry->d_inode; 162 struct inode *inode = file->f_path.dentry->d_inode;
@@ -153,12 +172,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
153 if (copy_from_user(&flags, arg, sizeof(flags))) 172 if (copy_from_user(&flags, arg, sizeof(flags)))
154 return -EFAULT; 173 return -EFAULT;
155 174
156 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 175 ret = check_flags(flags);
157 FS_NOATIME_FL | FS_NODUMP_FL | \ 176 if (ret)
158 FS_SYNC_FL | FS_DIRSYNC_FL)) 177 return ret;
159 return -EOPNOTSUPP;
160 178
161 if (!is_owner_or_cap(inode)) 179 if (!inode_owner_or_capable(inode))
162 return -EACCES; 180 return -EACCES;
163 181
164 mutex_lock(&inode->i_mutex); 182 mutex_lock(&inode->i_mutex);
@@ -201,6 +219,22 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
201 else 219 else
202 ip->flags &= ~BTRFS_INODE_DIRSYNC; 220 ip->flags &= ~BTRFS_INODE_DIRSYNC;
203 221
222 /*
223 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
224 * flag may be changed automatically if compression code won't make
225 * things smaller.
226 */
227 if (flags & FS_NOCOMP_FL) {
228 ip->flags &= ~BTRFS_INODE_COMPRESS;
229 ip->flags |= BTRFS_INODE_NOCOMPRESS;
230 } else if (flags & FS_COMPR_FL) {
231 ip->flags |= BTRFS_INODE_COMPRESS;
232 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
233 }
234 if (flags & FS_NOCOW_FL)
235 ip->flags |= BTRFS_INODE_NODATACOW;
236 else if (flags & FS_COW_FL)
237 ip->flags &= ~BTRFS_INODE_NODATACOW;
204 238
205 trans = btrfs_join_transaction(root, 1); 239 trans = btrfs_join_transaction(root, 1);
206 BUG_ON(IS_ERR(trans)); 240 BUG_ON(IS_ERR(trans));
@@ -213,9 +247,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
213 btrfs_end_transaction(trans, root); 247 btrfs_end_transaction(trans, root);
214 248
215 mnt_drop_write(file->f_path.mnt); 249 mnt_drop_write(file->f_path.mnt);
250
251 ret = 0;
216 out_unlock: 252 out_unlock:
217 mutex_unlock(&inode->i_mutex); 253 mutex_unlock(&inode->i_mutex);
218 return 0; 254 return ret;
219} 255}
220 256
221static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 257static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
@@ -225,6 +261,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
225 return put_user(inode->i_generation, arg); 261 return put_user(inode->i_generation, arg);
226} 262}
227 263
264static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
265{
266 struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
267 struct btrfs_fs_info *fs_info = root->fs_info;
268 struct btrfs_device *device;
269 struct request_queue *q;
270 struct fstrim_range range;
271 u64 minlen = ULLONG_MAX;
272 u64 num_devices = 0;
273 int ret;
274
275 if (!capable(CAP_SYS_ADMIN))
276 return -EPERM;
277
278 mutex_lock(&fs_info->fs_devices->device_list_mutex);
279 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
280 if (!device->bdev)
281 continue;
282 q = bdev_get_queue(device->bdev);
283 if (blk_queue_discard(q)) {
284 num_devices++;
285 minlen = min((u64)q->limits.discard_granularity,
286 minlen);
287 }
288 }
289 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
290 if (!num_devices)
291 return -EOPNOTSUPP;
292
293 if (copy_from_user(&range, arg, sizeof(range)))
294 return -EFAULT;
295
296 range.minlen = max(range.minlen, minlen);
297 ret = btrfs_trim_fs(root, &range);
298 if (ret < 0)
299 return ret;
300
301 if (copy_to_user(arg, &range, sizeof(range)))
302 return -EFAULT;
303
304 return 0;
305}
306
228static noinline int create_subvol(struct btrfs_root *root, 307static noinline int create_subvol(struct btrfs_root *root,
229 struct dentry *dentry, 308 struct dentry *dentry,
230 char *name, int namelen, 309 char *name, int namelen,
@@ -294,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root,
294 inode_item->nbytes = cpu_to_le64(root->leafsize); 373 inode_item->nbytes = cpu_to_le64(root->leafsize);
295 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 374 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
296 375
376 root_item.flags = 0;
377 root_item.byte_limit = 0;
378 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
379
297 btrfs_set_root_bytenr(&root_item, leaf->start); 380 btrfs_set_root_bytenr(&root_item, leaf->start);
298 btrfs_set_root_generation(&root_item, trans->transid); 381 btrfs_set_root_generation(&root_item, trans->transid);
299 btrfs_set_root_level(&root_item, 0); 382 btrfs_set_root_level(&root_item, 0);
@@ -409,7 +492,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
409 if (ret) 492 if (ret)
410 goto fail; 493 goto fail;
411 494
412 btrfs_orphan_cleanup(pending_snapshot->snap); 495 ret = btrfs_orphan_cleanup(pending_snapshot->snap);
496 if (ret)
497 goto fail;
413 498
414 parent = dget_parent(dentry); 499 parent = dget_parent(dentry);
415 inode = btrfs_lookup_dentry(parent->d_inode, dentry); 500 inode = btrfs_lookup_dentry(parent->d_inode, dentry);
@@ -1077,7 +1162,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1162 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1163 return -EOPNOTSUPP;
1079 1164
1080 if (!is_owner_or_cap(inode)) 1165 if (!inode_owner_or_capable(inode))
1081 return -EACCES; 1166 return -EACCES;
1082 1167
1083 down_write(&root->fs_info->subvol_sem); 1168 down_write(&root->fs_info->subvol_sem);
@@ -2202,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2202 struct btrfs_ioctl_space_info space; 2287 struct btrfs_ioctl_space_info space;
2203 struct btrfs_ioctl_space_info *dest; 2288 struct btrfs_ioctl_space_info *dest;
2204 struct btrfs_ioctl_space_info *dest_orig; 2289 struct btrfs_ioctl_space_info *dest_orig;
2205 struct btrfs_ioctl_space_info *user_dest; 2290 struct btrfs_ioctl_space_info __user *user_dest;
2206 struct btrfs_space_info *info; 2291 struct btrfs_space_info *info;
2207 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2208 BTRFS_BLOCK_GROUP_SYSTEM, 2293 BTRFS_BLOCK_GROUP_SYSTEM,
@@ -2348,12 +2433,17 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2348 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 2433 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2349 struct btrfs_trans_handle *trans; 2434 struct btrfs_trans_handle *trans;
2350 u64 transid; 2435 u64 transid;
2436 int ret;
2351 2437
2352 trans = btrfs_start_transaction(root, 0); 2438 trans = btrfs_start_transaction(root, 0);
2353 if (IS_ERR(trans)) 2439 if (IS_ERR(trans))
2354 return PTR_ERR(trans); 2440 return PTR_ERR(trans);
2355 transid = trans->transid; 2441 transid = trans->transid;
2356 btrfs_commit_transaction_async(trans, root, 0); 2442 ret = btrfs_commit_transaction_async(trans, root, 0);
2443 if (ret) {
2444 btrfs_end_transaction(trans, root);
2445 return ret;
2446 }
2357 2447
2358 if (argp) 2448 if (argp)
2359 if (copy_to_user(argp, &transid, sizeof(transid))) 2449 if (copy_to_user(argp, &transid, sizeof(transid)))
@@ -2388,6 +2478,8 @@ long btrfs_ioctl(struct file *file, unsigned int
2388 return btrfs_ioctl_setflags(file, argp); 2478 return btrfs_ioctl_setflags(file, argp);
2389 case FS_IOC_GETVERSION: 2479 case FS_IOC_GETVERSION:
2390 return btrfs_ioctl_getversion(file, argp); 2480 return btrfs_ioctl_getversion(file, argp);
2481 case FITRIM:
2482 return btrfs_ioctl_fitrim(file, argp);
2391 case BTRFS_IOC_SNAP_CREATE: 2483 case BTRFS_IOC_SNAP_CREATE:
2392 return btrfs_ioctl_snap_create(file, argp, 0); 2484 return btrfs_ioctl_snap_create(file, argp, 0);
2393 case BTRFS_IOC_SNAP_CREATE_V2: 2485 case BTRFS_IOC_SNAP_CREATE_V2:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 083a55477375..a1c940425307 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
202 INIT_LIST_HEAD(&entry->list); 202 INIT_LIST_HEAD(&entry->list);
203 INIT_LIST_HEAD(&entry->root_extent_list); 203 INIT_LIST_HEAD(&entry->root_extent_list);
204 204
205 trace_btrfs_ordered_extent_add(inode, entry);
206
205 spin_lock(&tree->lock); 207 spin_lock(&tree->lock);
206 node = tree_insert(&tree->tree, file_offset, 208 node = tree_insert(&tree->tree, file_offset,
207 &entry->rb_node); 209 &entry->rb_node);
@@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
387 struct list_head *cur; 389 struct list_head *cur;
388 struct btrfs_ordered_sum *sum; 390 struct btrfs_ordered_sum *sum;
389 391
392 trace_btrfs_ordered_extent_put(entry->inode, entry);
393
390 if (atomic_dec_and_test(&entry->refs)) { 394 if (atomic_dec_and_test(&entry->refs)) {
391 while (!list_empty(&entry->list)) { 395 while (!list_empty(&entry->list)) {
392 cur = entry->list.next; 396 cur = entry->list.next;
@@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
420 spin_lock(&root->fs_info->ordered_extent_lock); 424 spin_lock(&root->fs_info->ordered_extent_lock);
421 list_del_init(&entry->root_extent_list); 425 list_del_init(&entry->root_extent_list);
422 426
427 trace_btrfs_ordered_extent_remove(inode, entry);
428
423 /* 429 /*
424 * we have no more ordered extents for this inode and 430 * we have no more ordered extents for this inode and
425 * no dirty pages. We can safely remove it from the 431 * no dirty pages. We can safely remove it from the
@@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
585 u64 start = entry->file_offset; 591 u64 start = entry->file_offset;
586 u64 end = start + entry->len - 1; 592 u64 end = start + entry->len - 1;
587 593
594 trace_btrfs_ordered_extent_start(inode, entry);
595
588 /* 596 /*
589 * pages in the range can be dirty, clean or writeback. We 597 * pages in the range can be dirty, clean or writeback. We
590 * start IO on any dirty ones so the wait doesn't stall waiting 598 * start IO on any dirty ones so the wait doesn't stall waiting
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 31ade5802ae8..199a80134312 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1724,6 +1724,7 @@ again:
1724 1724
1725 eb = read_tree_block(dest, old_bytenr, blocksize, 1725 eb = read_tree_block(dest, old_bytenr, blocksize,
1726 old_ptr_gen); 1726 old_ptr_gen);
1727 BUG_ON(!eb);
1727 btrfs_tree_lock(eb); 1728 btrfs_tree_lock(eb);
1728 if (cow) { 1729 if (cow) {
1729 ret = btrfs_cow_block(trans, dest, eb, parent, 1730 ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -2345,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2345 root = next->root; 2346 root = next->root;
2346 BUG_ON(!root); 2347 BUG_ON(!root);
2347 2348
2348 /* no other choice for non-refernce counted tree */ 2349 /* no other choice for non-references counted tree */
2349 if (!root->ref_cows) 2350 if (!root->ref_cows)
2350 return root; 2351 return root;
2351 2352
@@ -2513,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2513 blocksize = btrfs_level_size(root, node->level); 2514 blocksize = btrfs_level_size(root, node->level);
2514 generation = btrfs_node_ptr_generation(upper->eb, slot); 2515 generation = btrfs_node_ptr_generation(upper->eb, slot);
2515 eb = read_tree_block(root, bytenr, blocksize, generation); 2516 eb = read_tree_block(root, bytenr, blocksize, generation);
2517 if (!eb) {
2518 err = -EIO;
2519 goto next;
2520 }
2516 btrfs_tree_lock(eb); 2521 btrfs_tree_lock(eb);
2517 btrfs_set_lock_blocking(eb); 2522 btrfs_set_lock_blocking(eb);
2518 2523
@@ -2670,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc,
2670 BUG_ON(block->key_ready); 2675 BUG_ON(block->key_ready);
2671 eb = read_tree_block(rc->extent_root, block->bytenr, 2676 eb = read_tree_block(rc->extent_root, block->bytenr,
2672 block->key.objectid, block->key.offset); 2677 block->key.objectid, block->key.offset);
2678 BUG_ON(!eb);
2673 WARN_ON(btrfs_header_level(eb) != block->level); 2679 WARN_ON(btrfs_header_level(eb) != block->level);
2674 if (block->level == 0) 2680 if (block->level == 0)
2675 btrfs_item_key_to_cpu(eb, &block->key, 0); 2681 btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -4209,7 +4215,7 @@ out:
4209 if (IS_ERR(fs_root)) 4215 if (IS_ERR(fs_root))
4210 err = PTR_ERR(fs_root); 4216 err = PTR_ERR(fs_root);
4211 else 4217 else
4212 btrfs_orphan_cleanup(fs_root); 4218 err = btrfs_orphan_cleanup(fs_root);
4213 } 4219 }
4214 return err; 4220 return err;
4215} 4221}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6a1086e83ffc..6928bff62daa 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
88 search_key.offset = (u64)-1; 88 search_key.offset = (u64)-1;
89 89
90 path = btrfs_alloc_path(); 90 path = btrfs_alloc_path();
91 BUG_ON(!path); 91 if (!path)
92 return -ENOMEM;
92 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 93 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
93 if (ret < 0) 94 if (ret < 0)
94 goto out; 95 goto out;
@@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
332 struct extent_buffer *leaf; 333 struct extent_buffer *leaf;
333 334
334 path = btrfs_alloc_path(); 335 path = btrfs_alloc_path();
335 BUG_ON(!path); 336 if (!path)
337 return -ENOMEM;
336 ret = btrfs_search_slot(trans, root, key, path, -1, 1); 338 ret = btrfs_search_slot(trans, root, key, path, -1, 1);
337 if (ret < 0) 339 if (ret < 0)
338 goto out; 340 goto out;
@@ -471,3 +473,21 @@ again:
471 btrfs_free_path(path); 473 btrfs_free_path(path);
472 return 0; 474 return 0;
473} 475}
476
477/*
478 * Old btrfs forgets to init root_item->flags and root_item->byte_limit
479 * for subvolumes. To work around this problem, we steal a bit from
480 * root_item->inode_item->flags, and use it to indicate if those fields
481 * have been properly initialized.
482 */
483void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
484{
485 u64 inode_flags = le64_to_cpu(root_item->inode.flags);
486
487 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
488 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
489 root_item->inode.flags = cpu_to_le64(inode_flags);
490 root_item->flags = 0;
491 root_item->byte_limit = 0;
492 }
493}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d39a9895d932..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,6 +52,9 @@
52#include "export.h" 52#include "export.h"
53#include "compression.h" 53#include "compression.h"
54 54
55#define CREATE_TRACE_POINTS
56#include <trace/events/btrfs.h>
57
55static const struct super_operations btrfs_super_ops; 58static const struct super_operations btrfs_super_ops;
56 59
57static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, 60static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
@@ -156,7 +159,7 @@ enum {
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err, 162 Opt_enospc_debug, Opt_subvolrootid, Opt_err,
160}; 163};
161 164
162static match_table_t tokens = { 165static match_table_t tokens = {
@@ -186,6 +189,7 @@ static match_table_t tokens = {
186 {Opt_clear_cache, "clear_cache"}, 189 {Opt_clear_cache, "clear_cache"},
187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"}, 191 {Opt_enospc_debug, "enospc_debug"},
192 {Opt_subvolrootid, "subvolrootid=%d"},
189 {Opt_err, NULL}, 193 {Opt_err, NULL},
190}; 194};
191 195
@@ -229,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
229 break; 233 break;
230 case Opt_subvol: 234 case Opt_subvol:
231 case Opt_subvolid: 235 case Opt_subvolid:
236 case Opt_subvolrootid:
232 case Opt_device: 237 case Opt_device:
233 /* 238 /*
234 * These are parsed by btrfs_parse_early_options 239 * These are parsed by btrfs_parse_early_options
@@ -385,7 +390,7 @@ out:
385 */ 390 */
386static int btrfs_parse_early_options(const char *options, fmode_t flags, 391static int btrfs_parse_early_options(const char *options, fmode_t flags,
387 void *holder, char **subvol_name, u64 *subvol_objectid, 392 void *holder, char **subvol_name, u64 *subvol_objectid,
388 struct btrfs_fs_devices **fs_devices) 393 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
389{ 394{
390 substring_t args[MAX_OPT_ARGS]; 395 substring_t args[MAX_OPT_ARGS];
391 char *opts, *orig, *p; 396 char *opts, *orig, *p;
@@ -426,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
426 *subvol_objectid = intarg; 431 *subvol_objectid = intarg;
427 } 432 }
428 break; 433 break;
434 case Opt_subvolrootid:
435 intarg = 0;
436 error = match_int(&args[0], &intarg);
437 if (!error) {
438 /* we want the original fs_tree */
439 if (!intarg)
440 *subvol_rootid =
441 BTRFS_FS_TREE_OBJECTID;
442 else
443 *subvol_rootid = intarg;
444 }
445 break;
429 case Opt_device: 446 case Opt_device:
430 error = btrfs_scan_one_device(match_strdup(&args[0]), 447 error = btrfs_scan_one_device(match_strdup(&args[0]),
431 flags, holder, fs_devices); 448 flags, holder, fs_devices);
@@ -620,6 +637,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
620 struct btrfs_root *root = btrfs_sb(sb); 637 struct btrfs_root *root = btrfs_sb(sb);
621 int ret; 638 int ret;
622 639
640 trace_btrfs_sync_fs(wait);
641
623 if (!wait) { 642 if (!wait) {
624 filemap_flush(root->fs_info->btree_inode->i_mapping); 643 filemap_flush(root->fs_info->btree_inode->i_mapping);
625 return 0; 644 return 0;
@@ -639,6 +658,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
639{ 658{
640 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); 659 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
641 struct btrfs_fs_info *info = root->fs_info; 660 struct btrfs_fs_info *info = root->fs_info;
661 char *compress_type;
642 662
643 if (btrfs_test_opt(root, DEGRADED)) 663 if (btrfs_test_opt(root, DEGRADED))
644 seq_puts(seq, ",degraded"); 664 seq_puts(seq, ",degraded");
@@ -657,8 +677,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
657 if (info->thread_pool_size != min_t(unsigned long, 677 if (info->thread_pool_size != min_t(unsigned long,
658 num_online_cpus() + 2, 8)) 678 num_online_cpus() + 2, 8))
659 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 679 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
660 if (btrfs_test_opt(root, COMPRESS)) 680 if (btrfs_test_opt(root, COMPRESS)) {
661 seq_puts(seq, ",compress"); 681 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
682 compress_type = "zlib";
683 else
684 compress_type = "lzo";
685 if (btrfs_test_opt(root, FORCE_COMPRESS))
686 seq_printf(seq, ",compress-force=%s", compress_type);
687 else
688 seq_printf(seq, ",compress=%s", compress_type);
689 }
662 if (btrfs_test_opt(root, NOSSD)) 690 if (btrfs_test_opt(root, NOSSD))
663 seq_puts(seq, ",nossd"); 691 seq_puts(seq, ",nossd");
664 if (btrfs_test_opt(root, SSD_SPREAD)) 692 if (btrfs_test_opt(root, SSD_SPREAD))
@@ -673,6 +701,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
673 seq_puts(seq, ",discard"); 701 seq_puts(seq, ",discard");
674 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 702 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
675 seq_puts(seq, ",noacl"); 703 seq_puts(seq, ",noacl");
704 if (btrfs_test_opt(root, SPACE_CACHE))
705 seq_puts(seq, ",space_cache");
706 if (btrfs_test_opt(root, CLEAR_CACHE))
707 seq_puts(seq, ",clear_cache");
708 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
709 seq_puts(seq, ",user_subvol_rm_allowed");
676 return 0; 710 return 0;
677} 711}
678 712
@@ -716,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
716 fmode_t mode = FMODE_READ; 750 fmode_t mode = FMODE_READ;
717 char *subvol_name = NULL; 751 char *subvol_name = NULL;
718 u64 subvol_objectid = 0; 752 u64 subvol_objectid = 0;
753 u64 subvol_rootid = 0;
719 int error = 0; 754 int error = 0;
720 755
721 if (!(flags & MS_RDONLY)) 756 if (!(flags & MS_RDONLY))
@@ -723,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
723 758
724 error = btrfs_parse_early_options(data, mode, fs_type, 759 error = btrfs_parse_early_options(data, mode, fs_type,
725 &subvol_name, &subvol_objectid, 760 &subvol_name, &subvol_objectid,
726 &fs_devices); 761 &subvol_rootid, &fs_devices);
727 if (error) 762 if (error)
728 return ERR_PTR(error); 763 return ERR_PTR(error);
729 764
@@ -787,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
787 s->s_flags |= MS_ACTIVE; 822 s->s_flags |= MS_ACTIVE;
788 } 823 }
789 824
790 root = get_default_root(s, subvol_objectid);
791 if (IS_ERR(root)) {
792 error = PTR_ERR(root);
793 deactivate_locked_super(s);
794 goto error_free_subvol_name;
795 }
796 /* if they gave us a subvolume name bind mount into that */ 825 /* if they gave us a subvolume name bind mount into that */
797 if (strcmp(subvol_name, ".")) { 826 if (strcmp(subvol_name, ".")) {
798 struct dentry *new_root; 827 struct dentry *new_root;
828
829 root = get_default_root(s, subvol_rootid);
830 if (IS_ERR(root)) {
831 error = PTR_ERR(root);
832 deactivate_locked_super(s);
833 goto error_free_subvol_name;
834 }
835
799 mutex_lock(&root->d_inode->i_mutex); 836 mutex_lock(&root->d_inode->i_mutex);
800 new_root = lookup_one_len(subvol_name, root, 837 new_root = lookup_one_len(subvol_name, root,
801 strlen(subvol_name)); 838 strlen(subvol_name));
@@ -816,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
816 } 853 }
817 dput(root); 854 dput(root);
818 root = new_root; 855 root = new_root;
856 } else {
857 root = get_default_root(s, subvol_objectid);
858 if (IS_ERR(root)) {
859 error = PTR_ERR(root);
860 deactivate_locked_super(s);
861 goto error_free_subvol_name;
862 }
819 } 863 }
820 864
821 kfree(subvol_name); 865 kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3d73c8d93bbb..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
32 32
33static noinline void put_transaction(struct btrfs_transaction *transaction) 33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(transaction->use_count == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 37 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 38 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 39 }
@@ -57,16 +55,17 @@ static noinline int join_transaction(struct btrfs_root *root)
57 if (!cur_trans) { 55 if (!cur_trans) {
58 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 56 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
59 GFP_NOFS); 57 GFP_NOFS);
60 BUG_ON(!cur_trans); 58 if (!cur_trans)
59 return -ENOMEM;
61 root->fs_info->generation++; 60 root->fs_info->generation++;
62 cur_trans->num_writers = 1; 61 atomic_set(&cur_trans->num_writers, 1);
63 cur_trans->num_joined = 0; 62 cur_trans->num_joined = 0;
64 cur_trans->transid = root->fs_info->generation; 63 cur_trans->transid = root->fs_info->generation;
65 init_waitqueue_head(&cur_trans->writer_wait); 64 init_waitqueue_head(&cur_trans->writer_wait);
66 init_waitqueue_head(&cur_trans->commit_wait); 65 init_waitqueue_head(&cur_trans->commit_wait);
67 cur_trans->in_commit = 0; 66 cur_trans->in_commit = 0;
68 cur_trans->blocked = 0; 67 cur_trans->blocked = 0;
69 cur_trans->use_count = 1; 68 atomic_set(&cur_trans->use_count, 1);
70 cur_trans->commit_done = 0; 69 cur_trans->commit_done = 0;
71 cur_trans->start_time = get_seconds(); 70 cur_trans->start_time = get_seconds();
72 71
@@ -87,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
87 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
88 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
89 } else { 88 } else {
90 cur_trans->num_writers++; 89 atomic_inc(&cur_trans->num_writers);
91 cur_trans->num_joined++; 90 cur_trans->num_joined++;
92 } 91 }
93 92
@@ -144,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
144 cur_trans = root->fs_info->running_transaction; 143 cur_trans = root->fs_info->running_transaction;
145 if (cur_trans && cur_trans->blocked) { 144 if (cur_trans && cur_trans->blocked) {
146 DEFINE_WAIT(wait); 145 DEFINE_WAIT(wait);
147 cur_trans->use_count++; 146 atomic_inc(&cur_trans->use_count);
148 while (1) { 147 while (1) {
149 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
150 TASK_UNINTERRUPTIBLE); 149 TASK_UNINTERRUPTIBLE);
@@ -180,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
180{ 179{
181 struct btrfs_trans_handle *h; 180 struct btrfs_trans_handle *h;
182 struct btrfs_transaction *cur_trans; 181 struct btrfs_transaction *cur_trans;
182 int retries = 0;
183 int ret; 183 int ret;
184 184
185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -195,10 +195,15 @@ again:
195 wait_current_trans(root); 195 wait_current_trans(root);
196 196
197 ret = join_transaction(root); 197 ret = join_transaction(root);
198 BUG_ON(ret); 198 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret);
203 }
199 204
200 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
201 cur_trans->use_count++; 206 atomic_inc(&cur_trans->use_count);
202 if (type != TRANS_JOIN_NOLOCK) 207 if (type != TRANS_JOIN_NOLOCK)
203 mutex_unlock(&root->fs_info->trans_mutex); 208 mutex_unlock(&root->fs_info->trans_mutex);
204 209
@@ -218,10 +223,18 @@ again:
218 223
219 if (num_items > 0) { 224 if (num_items > 0) {
220 ret = btrfs_trans_reserve_metadata(h, root, num_items); 225 ret = btrfs_trans_reserve_metadata(h, root, num_items);
221 if (ret == -EAGAIN) { 226 if (ret == -EAGAIN && !retries) {
227 retries++;
222 btrfs_commit_transaction(h, root); 228 btrfs_commit_transaction(h, root);
223 goto again; 229 goto again;
230 } else if (ret == -EAGAIN) {
231 /*
232 * We have already retried and got EAGAIN, so really we
233 * don't have space, so set ret to -ENOSPC.
234 */
235 ret = -ENOSPC;
224 } 236 }
237
225 if (ret < 0) { 238 if (ret < 0) {
226 btrfs_end_transaction(h, root); 239 btrfs_end_transaction(h, root);
227 return ERR_PTR(ret); 240 return ERR_PTR(ret);
@@ -321,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
321 goto out_unlock; /* nothing committing|committed */ 334 goto out_unlock; /* nothing committing|committed */
322 } 335 }
323 336
324 cur_trans->use_count++; 337 atomic_inc(&cur_trans->use_count);
325 mutex_unlock(&root->fs_info->trans_mutex); 338 mutex_unlock(&root->fs_info->trans_mutex);
326 339
327 wait_for_commit(root, cur_trans); 340 wait_for_commit(root, cur_trans);
@@ -451,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
451 wake_up_process(info->transaction_kthread); 464 wake_up_process(info->transaction_kthread);
452 } 465 }
453 466
454 if (lock)
455 mutex_lock(&info->trans_mutex);
456 WARN_ON(cur_trans != info->running_transaction); 467 WARN_ON(cur_trans != info->running_transaction);
457 WARN_ON(cur_trans->num_writers < 1); 468 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
458 cur_trans->num_writers--; 469 atomic_dec(&cur_trans->num_writers);
459 470
460 smp_mb(); 471 smp_mb();
461 if (waitqueue_active(&cur_trans->writer_wait)) 472 if (waitqueue_active(&cur_trans->writer_wait))
462 wake_up(&cur_trans->writer_wait); 473 wake_up(&cur_trans->writer_wait);
463 put_transaction(cur_trans); 474 put_transaction(cur_trans);
464 if (lock)
465 mutex_unlock(&info->trans_mutex);
466 475
467 if (current->journal_info == trans) 476 if (current->journal_info == trans)
468 current->journal_info = NULL; 477 current->journal_info = NULL;
@@ -970,6 +979,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
970 record_root_in_trans(trans, root); 979 record_root_in_trans(trans, root);
971 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 980 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
972 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 981 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
982 btrfs_check_and_init_root_item(new_root_item);
973 983
974 root_flags = btrfs_root_flags(new_root_item); 984 root_flags = btrfs_root_flags(new_root_item);
975 if (pending->readonly) 985 if (pending->readonly)
@@ -1156,7 +1166,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1156 struct btrfs_transaction *cur_trans; 1166 struct btrfs_transaction *cur_trans;
1157 1167
1158 ac = kmalloc(sizeof(*ac), GFP_NOFS); 1168 ac = kmalloc(sizeof(*ac), GFP_NOFS);
1159 BUG_ON(!ac); 1169 if (!ac)
1170 return -ENOMEM;
1160 1171
1161 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1172 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1162 ac->root = root; 1173 ac->root = root;
@@ -1170,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1170 /* take transaction reference */ 1181 /* take transaction reference */
1171 mutex_lock(&root->fs_info->trans_mutex); 1182 mutex_lock(&root->fs_info->trans_mutex);
1172 cur_trans = trans->transaction; 1183 cur_trans = trans->transaction;
1173 cur_trans->use_count++; 1184 atomic_inc(&cur_trans->use_count);
1174 mutex_unlock(&root->fs_info->trans_mutex); 1185 mutex_unlock(&root->fs_info->trans_mutex);
1175 1186
1176 btrfs_end_transaction(trans, root); 1187 btrfs_end_transaction(trans, root);
@@ -1229,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1229 1240
1230 mutex_lock(&root->fs_info->trans_mutex); 1241 mutex_lock(&root->fs_info->trans_mutex);
1231 if (cur_trans->in_commit) { 1242 if (cur_trans->in_commit) {
1232 cur_trans->use_count++; 1243 atomic_inc(&cur_trans->use_count);
1233 mutex_unlock(&root->fs_info->trans_mutex); 1244 mutex_unlock(&root->fs_info->trans_mutex);
1234 btrfs_end_transaction(trans, root); 1245 btrfs_end_transaction(trans, root);
1235 1246
@@ -1251,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1251 prev_trans = list_entry(cur_trans->list.prev, 1262 prev_trans = list_entry(cur_trans->list.prev,
1252 struct btrfs_transaction, list); 1263 struct btrfs_transaction, list);
1253 if (!prev_trans->commit_done) { 1264 if (!prev_trans->commit_done) {
1254 prev_trans->use_count++; 1265 atomic_inc(&prev_trans->use_count);
1255 mutex_unlock(&root->fs_info->trans_mutex); 1266 mutex_unlock(&root->fs_info->trans_mutex);
1256 1267
1257 wait_for_commit(root, prev_trans); 1268 wait_for_commit(root, prev_trans);
@@ -1292,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1292 TASK_UNINTERRUPTIBLE); 1303 TASK_UNINTERRUPTIBLE);
1293 1304
1294 smp_mb(); 1305 smp_mb();
1295 if (cur_trans->num_writers > 1) 1306 if (atomic_read(&cur_trans->num_writers) > 1)
1296 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1307 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1297 else if (should_grow) 1308 else if (should_grow)
1298 schedule_timeout(1); 1309 schedule_timeout(1);
1299 1310
1300 mutex_lock(&root->fs_info->trans_mutex); 1311 mutex_lock(&root->fs_info->trans_mutex);
1301 finish_wait(&cur_trans->writer_wait, &wait); 1312 finish_wait(&cur_trans->writer_wait, &wait);
1302 } while (cur_trans->num_writers > 1 || 1313 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1303 (should_grow && cur_trans->num_joined != joined)); 1314 (should_grow && cur_trans->num_joined != joined));
1304 1315
1305 ret = create_pending_snapshots(trans, root->fs_info); 1316 ret = create_pending_snapshots(trans, root->fs_info);
@@ -1386,9 +1397,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1386 1397
1387 wake_up(&cur_trans->commit_wait); 1398 wake_up(&cur_trans->commit_wait);
1388 1399
1400 list_del_init(&cur_trans->list);
1389 put_transaction(cur_trans); 1401 put_transaction(cur_trans);
1390 put_transaction(cur_trans); 1402 put_transaction(cur_trans);
1391 1403
1404 trace_btrfs_transaction_commit(root);
1405
1392 mutex_unlock(&root->fs_info->trans_mutex); 1406 mutex_unlock(&root->fs_info->trans_mutex);
1393 1407
1394 if (current->journal_info == trans) 1408 if (current->journal_info == trans)
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
27 * total writers in this transaction, it must be zero before the 27 * total writers in this transaction, it must be zero before the
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 unsigned long num_writers; 30 atomic_t num_writers;
31 31
32 unsigned long num_joined; 32 unsigned long num_joined;
33 int in_commit; 33 int in_commit;
34 int use_count; 34 atomic_t use_count;
35 int commit_done; 35 int commit_done;
36 int blocked; 36 int blocked;
37 struct list_head list; 37 struct list_head list;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a4bbb854dfd2..c50271ad3157 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -799,12 +799,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
799 struct inode *dir; 799 struct inode *dir;
800 int ret; 800 int ret;
801 struct btrfs_inode_ref *ref; 801 struct btrfs_inode_ref *ref;
802 struct btrfs_dir_item *di;
803 struct inode *inode; 802 struct inode *inode;
804 char *name; 803 char *name;
805 int namelen; 804 int namelen;
806 unsigned long ref_ptr; 805 unsigned long ref_ptr;
807 unsigned long ref_end; 806 unsigned long ref_end;
807 int search_done = 0;
808 808
809 /* 809 /*
810 * it is possible that we didn't log all the parent directories 810 * it is possible that we didn't log all the parent directories
@@ -845,7 +845,10 @@ again:
845 * existing back reference, and we don't want to create 845 * existing back reference, and we don't want to create
846 * dangling pointers in the directory. 846 * dangling pointers in the directory.
847 */ 847 */
848conflict_again: 848
849 if (search_done)
850 goto insert;
851
849 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 852 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
850 if (ret == 0) { 853 if (ret == 0) {
851 char *victim_name; 854 char *victim_name;
@@ -886,37 +889,21 @@ conflict_again:
886 ret = btrfs_unlink_inode(trans, root, dir, 889 ret = btrfs_unlink_inode(trans, root, dir,
887 inode, victim_name, 890 inode, victim_name,
888 victim_name_len); 891 victim_name_len);
889 kfree(victim_name);
890 btrfs_release_path(root, path);
891 goto conflict_again;
892 } 892 }
893 kfree(victim_name); 893 kfree(victim_name);
894 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 894 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
895 } 895 }
896 BUG_ON(ret); 896 BUG_ON(ret);
897 }
898 btrfs_release_path(root, path);
899
900 /* look for a conflicting sequence number */
901 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
902 btrfs_inode_ref_index(eb, ref),
903 name, namelen, 0);
904 if (di && !IS_ERR(di)) {
905 ret = drop_one_dir_item(trans, root, path, dir, di);
906 BUG_ON(ret);
907 }
908 btrfs_release_path(root, path);
909 897
910 898 /*
911 /* look for a conflicting name */ 899 * NOTE: we have searched root tree and checked the
912 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 900 * coresponding ref, it does not need to check again.
913 name, namelen, 0); 901 */
914 if (di && !IS_ERR(di)) { 902 search_done = 1;
915 ret = drop_one_dir_item(trans, root, path, dir, di);
916 BUG_ON(ret);
917 } 903 }
918 btrfs_release_path(root, path); 904 btrfs_release_path(root, path);
919 905
906insert:
920 /* insert our name */ 907 /* insert our name */
921 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, 908 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
922 btrfs_inode_ref_index(eb, ref)); 909 btrfs_inode_ref_index(eb, ref));
@@ -1286,6 +1273,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1286 ptr_end = ptr + item_size; 1273 ptr_end = ptr + item_size;
1287 while (ptr < ptr_end) { 1274 while (ptr < ptr_end) {
1288 di = (struct btrfs_dir_item *)ptr; 1275 di = (struct btrfs_dir_item *)ptr;
1276 if (verify_dir_item(root, eb, di))
1277 return -EIO;
1289 name_len = btrfs_dir_name_len(eb, di); 1278 name_len = btrfs_dir_name_len(eb, di);
1290 ret = replay_one_name(trans, root, path, eb, di, key); 1279 ret = replay_one_name(trans, root, path, eb, di, key);
1291 BUG_ON(ret); 1280 BUG_ON(ret);
@@ -1412,6 +1401,11 @@ again:
1412 ptr_end = ptr + item_size; 1401 ptr_end = ptr + item_size;
1413 while (ptr < ptr_end) { 1402 while (ptr < ptr_end) {
1414 di = (struct btrfs_dir_item *)ptr; 1403 di = (struct btrfs_dir_item *)ptr;
1404 if (verify_dir_item(root, eb, di)) {
1405 ret = -EIO;
1406 goto out;
1407 }
1408
1415 name_len = btrfs_dir_name_len(eb, di); 1409 name_len = btrfs_dir_name_len(eb, di);
1416 name = kmalloc(name_len, GFP_NOFS); 1410 name = kmalloc(name_len, GFP_NOFS);
1417 if (!name) { 1411 if (!name) {
@@ -1821,7 +1815,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1821 int orig_level; 1815 int orig_level;
1822 1816
1823 path = btrfs_alloc_path(); 1817 path = btrfs_alloc_path();
1824 BUG_ON(!path); 1818 if (!path)
1819 return -ENOMEM;
1825 1820
1826 level = btrfs_header_level(log->node); 1821 level = btrfs_header_level(log->node);
1827 orig_level = level; 1822 orig_level = level;
@@ -3107,9 +3102,11 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3107 .stage = 0, 3102 .stage = 0,
3108 }; 3103 };
3109 3104
3110 fs_info->log_root_recovering = 1;
3111 path = btrfs_alloc_path(); 3105 path = btrfs_alloc_path();
3112 BUG_ON(!path); 3106 if (!path)
3107 return -ENOMEM;
3108
3109 fs_info->log_root_recovering = 1;
3113 3110
3114 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3111 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3115 BUG_ON(IS_ERR(trans)); 3112 BUG_ON(IS_ERR(trans));
@@ -3117,7 +3114,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3117 wc.trans = trans; 3114 wc.trans = trans;
3118 wc.pin = 1; 3115 wc.pin = 1;
3119 3116
3120 walk_log_tree(trans, log_root_tree, &wc); 3117 ret = walk_log_tree(trans, log_root_tree, &wc);
3118 BUG_ON(ret);
3121 3119
3122again: 3120again:
3123 key.objectid = BTRFS_TREE_LOG_OBJECTID; 3121 key.objectid = BTRFS_TREE_LOG_OBJECTID;
@@ -3141,8 +3139,7 @@ again:
3141 3139
3142 log = btrfs_read_fs_root_no_radix(log_root_tree, 3140 log = btrfs_read_fs_root_no_radix(log_root_tree,
3143 &found_key); 3141 &found_key);
3144 BUG_ON(!log); 3142 BUG_ON(IS_ERR(log));
3145
3146 3143
3147 tmp_key.objectid = found_key.offset; 3144 tmp_key.objectid = found_key.offset;
3148 tmp_key.type = BTRFS_ROOT_ITEM_KEY; 3145 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dd13eb81ee40..309a57b9fc85 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -33,17 +33,6 @@
33#include "volumes.h" 33#include "volumes.h"
34#include "async-thread.h" 34#include "async-thread.h"
35 35
36struct map_lookup {
37 u64 type;
38 int io_align;
39 int io_width;
40 int stripe_len;
41 int sector_size;
42 int num_stripes;
43 int sub_stripes;
44 struct btrfs_bio_stripe stripes[];
45};
46
47static int init_first_rw_device(struct btrfs_trans_handle *trans, 36static int init_first_rw_device(struct btrfs_trans_handle *trans,
48 struct btrfs_root *root, 37 struct btrfs_root *root,
49 struct btrfs_device *device); 38 struct btrfs_device *device);
@@ -162,7 +151,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
162 struct bio *cur; 151 struct bio *cur;
163 int again = 0; 152 int again = 0;
164 unsigned long num_run; 153 unsigned long num_run;
165 unsigned long num_sync_run;
166 unsigned long batch_run = 0; 154 unsigned long batch_run = 0;
167 unsigned long limit; 155 unsigned long limit;
168 unsigned long last_waited = 0; 156 unsigned long last_waited = 0;
@@ -173,11 +161,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
173 limit = btrfs_async_submit_limit(fs_info); 161 limit = btrfs_async_submit_limit(fs_info);
174 limit = limit * 2 / 3; 162 limit = limit * 2 / 3;
175 163
176 /* we want to make sure that every time we switch from the sync
177 * list to the normal list, we unplug
178 */
179 num_sync_run = 0;
180
181loop: 164loop:
182 spin_lock(&device->io_lock); 165 spin_lock(&device->io_lock);
183 166
@@ -223,15 +206,6 @@ loop_lock:
223 206
224 spin_unlock(&device->io_lock); 207 spin_unlock(&device->io_lock);
225 208
226 /*
227 * if we're doing the regular priority list, make sure we unplug
228 * for any high prio bios we've sent down
229 */
230 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
231 num_sync_run = 0;
232 blk_run_backing_dev(bdi, NULL);
233 }
234
235 while (pending) { 209 while (pending) {
236 210
237 rmb(); 211 rmb();
@@ -259,19 +233,11 @@ loop_lock:
259 233
260 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 234 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
261 235
262 if (cur->bi_rw & REQ_SYNC)
263 num_sync_run++;
264
265 submit_bio(cur->bi_rw, cur); 236 submit_bio(cur->bi_rw, cur);
266 num_run++; 237 num_run++;
267 batch_run++; 238 batch_run++;
268 if (need_resched()) { 239 if (need_resched())
269 if (num_sync_run) {
270 blk_run_backing_dev(bdi, NULL);
271 num_sync_run = 0;
272 }
273 cond_resched(); 240 cond_resched();
274 }
275 241
276 /* 242 /*
277 * we made progress, there is more work to do and the bdi 243 * we made progress, there is more work to do and the bdi
@@ -304,13 +270,8 @@ loop_lock:
304 * against it before looping 270 * against it before looping
305 */ 271 */
306 last_waited = ioc->last_waited; 272 last_waited = ioc->last_waited;
307 if (need_resched()) { 273 if (need_resched())
308 if (num_sync_run) {
309 blk_run_backing_dev(bdi, NULL);
310 num_sync_run = 0;
311 }
312 cond_resched(); 274 cond_resched();
313 }
314 continue; 275 continue;
315 } 276 }
316 spin_lock(&device->io_lock); 277 spin_lock(&device->io_lock);
@@ -323,22 +284,6 @@ loop_lock:
323 } 284 }
324 } 285 }
325 286
326 if (num_sync_run) {
327 num_sync_run = 0;
328 blk_run_backing_dev(bdi, NULL);
329 }
330 /*
331 * IO has already been through a long path to get here. Checksumming,
332 * async helper threads, perhaps compression. We've done a pretty
333 * good job of collecting a batch of IO and should just unplug
334 * the device right away.
335 *
336 * This will help anyone who is waiting on the IO, they might have
337 * already unplugged, but managed to do so before the bio they
338 * cared about found its way down here.
339 */
340 blk_run_backing_dev(bdi, NULL);
341
342 cond_resched(); 287 cond_resched();
343 if (again) 288 if (again)
344 goto loop; 289 goto loop;
@@ -1923,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1923 1868
1924 BUG_ON(ret); 1869 BUG_ON(ret);
1925 1870
1871 trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
1872
1926 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 1873 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1927 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); 1874 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1928 BUG_ON(ret); 1875 BUG_ON(ret);
@@ -2650,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2650 *num_bytes = chunk_bytes_by_type(type, calc_size, 2597 *num_bytes = chunk_bytes_by_type(type, calc_size,
2651 map->num_stripes, sub_stripes); 2598 map->num_stripes, sub_stripes);
2652 2599
2600 trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
2601
2653 em = alloc_extent_map(GFP_NOFS); 2602 em = alloc_extent_map(GFP_NOFS);
2654 if (!em) { 2603 if (!em) {
2655 ret = -ENOMEM; 2604 ret = -ENOMEM;
@@ -2758,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2758 item_size); 2707 item_size);
2759 BUG_ON(ret); 2708 BUG_ON(ret);
2760 } 2709 }
2710
2761 kfree(chunk); 2711 kfree(chunk);
2762 return 0; 2712 return 0;
2763} 2713}
@@ -2955,14 +2905,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
2955static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, 2905static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2956 u64 logical, u64 *length, 2906 u64 logical, u64 *length,
2957 struct btrfs_multi_bio **multi_ret, 2907 struct btrfs_multi_bio **multi_ret,
2958 int mirror_num, struct page *unplug_page) 2908 int mirror_num)
2959{ 2909{
2960 struct extent_map *em; 2910 struct extent_map *em;
2961 struct map_lookup *map; 2911 struct map_lookup *map;
2962 struct extent_map_tree *em_tree = &map_tree->map_tree; 2912 struct extent_map_tree *em_tree = &map_tree->map_tree;
2963 u64 offset; 2913 u64 offset;
2964 u64 stripe_offset; 2914 u64 stripe_offset;
2915 u64 stripe_end_offset;
2965 u64 stripe_nr; 2916 u64 stripe_nr;
2917 u64 stripe_nr_orig;
2918 u64 stripe_nr_end;
2966 int stripes_allocated = 8; 2919 int stripes_allocated = 8;
2967 int stripes_required = 1; 2920 int stripes_required = 1;
2968 int stripe_index; 2921 int stripe_index;
@@ -2971,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2971 int max_errors = 0; 2924 int max_errors = 0;
2972 struct btrfs_multi_bio *multi = NULL; 2925 struct btrfs_multi_bio *multi = NULL;
2973 2926
2974 if (multi_ret && !(rw & REQ_WRITE)) 2927 if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
2975 stripes_allocated = 1; 2928 stripes_allocated = 1;
2976again: 2929again:
2977 if (multi_ret) { 2930 if (multi_ret) {
@@ -2987,11 +2940,6 @@ again:
2987 em = lookup_extent_mapping(em_tree, logical, *length); 2940 em = lookup_extent_mapping(em_tree, logical, *length);
2988 read_unlock(&em_tree->lock); 2941 read_unlock(&em_tree->lock);
2989 2942
2990 if (!em && unplug_page) {
2991 kfree(multi);
2992 return 0;
2993 }
2994
2995 if (!em) { 2943 if (!em) {
2996 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2944 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
2997 (unsigned long long)logical, 2945 (unsigned long long)logical,
@@ -3017,7 +2965,15 @@ again:
3017 max_errors = 1; 2965 max_errors = 1;
3018 } 2966 }
3019 } 2967 }
3020 if (multi_ret && (rw & REQ_WRITE) && 2968 if (rw & REQ_DISCARD) {
2969 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2970 BTRFS_BLOCK_GROUP_RAID1 |
2971 BTRFS_BLOCK_GROUP_DUP |
2972 BTRFS_BLOCK_GROUP_RAID10)) {
2973 stripes_required = map->num_stripes;
2974 }
2975 }
2976 if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
3021 stripes_allocated < stripes_required) { 2977 stripes_allocated < stripes_required) {
3022 stripes_allocated = map->num_stripes; 2978 stripes_allocated = map->num_stripes;
3023 free_extent_map(em); 2979 free_extent_map(em);
@@ -3037,23 +2993,37 @@ again:
3037 /* stripe_offset is the offset of this block in its stripe*/ 2993 /* stripe_offset is the offset of this block in its stripe*/
3038 stripe_offset = offset - stripe_offset; 2994 stripe_offset = offset - stripe_offset;
3039 2995
3040 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 2996 if (rw & REQ_DISCARD)
3041 BTRFS_BLOCK_GROUP_RAID10 | 2997 *length = min_t(u64, em->len - offset, *length);
3042 BTRFS_BLOCK_GROUP_DUP)) { 2998 else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2999 BTRFS_BLOCK_GROUP_RAID1 |
3000 BTRFS_BLOCK_GROUP_RAID10 |
3001 BTRFS_BLOCK_GROUP_DUP)) {
3043 /* we limit the length of each bio to what fits in a stripe */ 3002 /* we limit the length of each bio to what fits in a stripe */
3044 *length = min_t(u64, em->len - offset, 3003 *length = min_t(u64, em->len - offset,
3045 map->stripe_len - stripe_offset); 3004 map->stripe_len - stripe_offset);
3046 } else { 3005 } else {
3047 *length = em->len - offset; 3006 *length = em->len - offset;
3048 } 3007 }
3049 3008
3050 if (!multi_ret && !unplug_page) 3009 if (!multi_ret)
3051 goto out; 3010 goto out;
3052 3011
3053 num_stripes = 1; 3012 num_stripes = 1;
3054 stripe_index = 0; 3013 stripe_index = 0;
3055 if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 3014 stripe_nr_orig = stripe_nr;
3056 if (unplug_page || (rw & REQ_WRITE)) 3015 stripe_nr_end = (offset + *length + map->stripe_len - 1) &
3016 (~(map->stripe_len - 1));
3017 do_div(stripe_nr_end, map->stripe_len);
3018 stripe_end_offset = stripe_nr_end * map->stripe_len -
3019 (offset + *length);
3020 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3021 if (rw & REQ_DISCARD)
3022 num_stripes = min_t(u64, map->num_stripes,
3023 stripe_nr_end - stripe_nr_orig);
3024 stripe_index = do_div(stripe_nr, map->num_stripes);
3025 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3026 if (rw & (REQ_WRITE | REQ_DISCARD))
3057 num_stripes = map->num_stripes; 3027 num_stripes = map->num_stripes;
3058 else if (mirror_num) 3028 else if (mirror_num)
3059 stripe_index = mirror_num - 1; 3029 stripe_index = mirror_num - 1;
@@ -3064,7 +3034,7 @@ again:
3064 } 3034 }
3065 3035
3066 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 3036 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3067 if (rw & REQ_WRITE) 3037 if (rw & (REQ_WRITE | REQ_DISCARD))
3068 num_stripes = map->num_stripes; 3038 num_stripes = map->num_stripes;
3069 else if (mirror_num) 3039 else if (mirror_num)
3070 stripe_index = mirror_num - 1; 3040 stripe_index = mirror_num - 1;
@@ -3075,8 +3045,12 @@ again:
3075 stripe_index = do_div(stripe_nr, factor); 3045 stripe_index = do_div(stripe_nr, factor);
3076 stripe_index *= map->sub_stripes; 3046 stripe_index *= map->sub_stripes;
3077 3047
3078 if (unplug_page || (rw & REQ_WRITE)) 3048 if (rw & REQ_WRITE)
3079 num_stripes = map->sub_stripes; 3049 num_stripes = map->sub_stripes;
3050 else if (rw & REQ_DISCARD)
3051 num_stripes = min_t(u64, map->sub_stripes *
3052 (stripe_nr_end - stripe_nr_orig),
3053 map->num_stripes);
3080 else if (mirror_num) 3054 else if (mirror_num)
3081 stripe_index += mirror_num - 1; 3055 stripe_index += mirror_num - 1;
3082 else { 3056 else {
@@ -3094,24 +3068,101 @@ again:
3094 } 3068 }
3095 BUG_ON(stripe_index >= map->num_stripes); 3069 BUG_ON(stripe_index >= map->num_stripes);
3096 3070
3097 for (i = 0; i < num_stripes; i++) { 3071 if (rw & REQ_DISCARD) {
3098 if (unplug_page) { 3072 for (i = 0; i < num_stripes; i++) {
3099 struct btrfs_device *device;
3100 struct backing_dev_info *bdi;
3101
3102 device = map->stripes[stripe_index].dev;
3103 if (device->bdev) {
3104 bdi = blk_get_backing_dev_info(device->bdev);
3105 if (bdi->unplug_io_fn)
3106 bdi->unplug_io_fn(bdi, unplug_page);
3107 }
3108 } else {
3109 multi->stripes[i].physical = 3073 multi->stripes[i].physical =
3110 map->stripes[stripe_index].physical + 3074 map->stripes[stripe_index].physical +
3111 stripe_offset + stripe_nr * map->stripe_len; 3075 stripe_offset + stripe_nr * map->stripe_len;
3112 multi->stripes[i].dev = map->stripes[stripe_index].dev; 3076 multi->stripes[i].dev = map->stripes[stripe_index].dev;
3077
3078 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3079 u64 stripes;
3080 u32 last_stripe = 0;
3081 int j;
3082
3083 div_u64_rem(stripe_nr_end - 1,
3084 map->num_stripes,
3085 &last_stripe);
3086
3087 for (j = 0; j < map->num_stripes; j++) {
3088 u32 test;
3089
3090 div_u64_rem(stripe_nr_end - 1 - j,
3091 map->num_stripes, &test);
3092 if (test == stripe_index)
3093 break;
3094 }
3095 stripes = stripe_nr_end - 1 - j;
3096 do_div(stripes, map->num_stripes);
3097 multi->stripes[i].length = map->stripe_len *
3098 (stripes - stripe_nr + 1);
3099
3100 if (i == 0) {
3101 multi->stripes[i].length -=
3102 stripe_offset;
3103 stripe_offset = 0;
3104 }
3105 if (stripe_index == last_stripe)
3106 multi->stripes[i].length -=
3107 stripe_end_offset;
3108 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3109 u64 stripes;
3110 int j;
3111 int factor = map->num_stripes /
3112 map->sub_stripes;
3113 u32 last_stripe = 0;
3114
3115 div_u64_rem(stripe_nr_end - 1,
3116 factor, &last_stripe);
3117 last_stripe *= map->sub_stripes;
3118
3119 for (j = 0; j < factor; j++) {
3120 u32 test;
3121
3122 div_u64_rem(stripe_nr_end - 1 - j,
3123 factor, &test);
3124
3125 if (test ==
3126 stripe_index / map->sub_stripes)
3127 break;
3128 }
3129 stripes = stripe_nr_end - 1 - j;
3130 do_div(stripes, factor);
3131 multi->stripes[i].length = map->stripe_len *
3132 (stripes - stripe_nr + 1);
3133
3134 if (i < map->sub_stripes) {
3135 multi->stripes[i].length -=
3136 stripe_offset;
3137 if (i == map->sub_stripes - 1)
3138 stripe_offset = 0;
3139 }
3140 if (stripe_index >= last_stripe &&
3141 stripe_index <= (last_stripe +
3142 map->sub_stripes - 1)) {
3143 multi->stripes[i].length -=
3144 stripe_end_offset;
3145 }
3146 } else
3147 multi->stripes[i].length = *length;
3148
3149 stripe_index++;
3150 if (stripe_index == map->num_stripes) {
3151 /* This could only happen for RAID0/10 */
3152 stripe_index = 0;
3153 stripe_nr++;
3154 }
3155 }
3156 } else {
3157 for (i = 0; i < num_stripes; i++) {
3158 multi->stripes[i].physical =
3159 map->stripes[stripe_index].physical +
3160 stripe_offset +
3161 stripe_nr * map->stripe_len;
3162 multi->stripes[i].dev =
3163 map->stripes[stripe_index].dev;
3164 stripe_index++;
3113 } 3165 }
3114 stripe_index++;
3115 } 3166 }
3116 if (multi_ret) { 3167 if (multi_ret) {
3117 *multi_ret = multi; 3168 *multi_ret = multi;
@@ -3128,7 +3179,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3128 struct btrfs_multi_bio **multi_ret, int mirror_num) 3179 struct btrfs_multi_bio **multi_ret, int mirror_num)
3129{ 3180{
3130 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, 3181 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
3131 mirror_num, NULL); 3182 mirror_num);
3132} 3183}
3133 3184
3134int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 3185int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3196,14 +3247,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
3196 return 0; 3247 return 0;
3197} 3248}
3198 3249
3199int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
3200 u64 logical, struct page *page)
3201{
3202 u64 length = PAGE_CACHE_SIZE;
3203 return __btrfs_map_block(map_tree, READ, logical, &length,
3204 NULL, 0, page);
3205}
3206
3207static void end_bio_multi_stripe(struct bio *bio, int err) 3250static void end_bio_multi_stripe(struct bio *bio, int err)
3208{ 3251{
3209 struct btrfs_multi_bio *multi = bio->bi_private; 3252 struct btrfs_multi_bio *multi = bio->bi_private;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7fb59d45fe8c..cc2eadaf7a27 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -126,6 +126,7 @@ struct btrfs_fs_devices {
126struct btrfs_bio_stripe { 126struct btrfs_bio_stripe {
127 struct btrfs_device *dev; 127 struct btrfs_device *dev;
128 u64 physical; 128 u64 physical;
129 u64 length; /* only used for discard mappings */
129}; 130};
130 131
131struct btrfs_multi_bio { 132struct btrfs_multi_bio {
@@ -145,6 +146,17 @@ struct btrfs_device_info {
145 u64 max_avail; 146 u64 max_avail;
146}; 147};
147 148
149struct map_lookup {
150 u64 type;
151 int io_align;
152 int io_width;
153 int stripe_len;
154 int sector_size;
155 int num_stripes;
156 int sub_stripes;
157 struct btrfs_bio_stripe stripes[];
158};
159
148/* Used to sort the devices by max_avail(descending sort) */ 160/* Used to sort the devices by max_avail(descending sort) */
149int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); 161int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
150 162
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index d779cefcfd7d..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
182 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
183 int ret = 0, slot, advance; 183 int ret = 0, slot;
184 size_t total_size = 0, size_left = size; 184 size_t total_size = 0, size_left = size;
185 unsigned long name_ptr; 185 unsigned long name_ptr;
186 size_t name_len; 186 size_t name_len;
187 u32 nritems;
188 187
189 /* 188 /*
190 * ok we want all objects associated with this id. 189 * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
204 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 203 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
205 if (ret < 0) 204 if (ret < 0)
206 goto err; 205 goto err;
207 advance = 0; 206
208 while (1) { 207 while (1) {
209 leaf = path->nodes[0]; 208 leaf = path->nodes[0];
210 nritems = btrfs_header_nritems(leaf);
211 slot = path->slots[0]; 209 slot = path->slots[0];
212 210
213 /* this is where we start walking through the path */ 211 /* this is where we start walking through the path */
214 if (advance || slot >= nritems) { 212 if (slot >= btrfs_header_nritems(leaf)) {
215 /* 213 /*
216 * if we've reached the last slot in this leaf we need 214 * if we've reached the last slot in this leaf we need
217 * to go to the next leaf and reset everything 215 * to go to the next leaf and reset everything
218 */ 216 */
219 if (slot >= nritems-1) { 217 ret = btrfs_next_leaf(root, path);
220 ret = btrfs_next_leaf(root, path); 218 if (ret < 0)
221 if (ret) 219 goto err;
222 break; 220 else if (ret > 0)
223 leaf = path->nodes[0]; 221 break;
224 nritems = btrfs_header_nritems(leaf); 222 continue;
225 slot = path->slots[0];
226 } else {
227 /*
228 * just walking through the slots on this leaf
229 */
230 slot++;
231 path->slots[0]++;
232 }
233 } 223 }
234 advance = 1;
235 224
236 btrfs_item_key_to_cpu(leaf, &found_key, slot); 225 btrfs_item_key_to_cpu(leaf, &found_key, slot);
237 226
@@ -242,13 +231,15 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
242 break; 231 break;
243 232
244 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 233 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
234 if (verify_dir_item(root, leaf, di))
235 continue;
245 236
246 name_len = btrfs_dir_name_len(leaf, di); 237 name_len = btrfs_dir_name_len(leaf, di);
247 total_size += name_len + 1; 238 total_size += name_len + 1;
248 239
249 /* we are just looking for how big our buffer needs to be */ 240 /* we are just looking for how big our buffer needs to be */
250 if (!size) 241 if (!size)
251 continue; 242 goto next;
252 243
253 if (!buffer || (name_len + 1) > size_left) { 244 if (!buffer || (name_len + 1) > size_left) {
254 ret = -ERANGE; 245 ret = -ERANGE;
@@ -261,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
261 252
262 size_left -= name_len + 1; 253 size_left -= name_len + 1;
263 buffer += name_len + 1; 254 buffer += name_len + 1;
255next:
256 path->slots[0]++;
264 } 257 }
265 ret = total_size; 258 ret = total_size;
266 259
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index f5ec2d44150d..faccd47c6c46 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -57,7 +57,8 @@ static struct list_head *zlib_alloc_workspace(void)
57 if (!workspace) 57 if (!workspace)
58 return ERR_PTR(-ENOMEM); 58 return ERR_PTR(-ENOMEM);
59 59
60 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 60 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize(
61 MAX_WBITS, MAX_MEM_LEVEL));
61 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 62 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
62 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 63 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
63 if (!workspace->def_strm.workspace || 64 if (!workspace->def_strm.workspace ||
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2caf..a08bb8e61c6f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
54} 54}
55EXPORT_SYMBOL(init_buffer); 55EXPORT_SYMBOL(init_buffer);
56 56
57static int sync_buffer(void *word) 57static int sleep_on_buffer(void *word)
58{ 58{
59 struct block_device *bd;
60 struct buffer_head *bh
61 = container_of(word, struct buffer_head, b_state);
62
63 smp_mb();
64 bd = bh->b_bdev;
65 if (bd)
66 blk_run_address_space(bd->bd_inode->i_mapping);
67 io_schedule(); 59 io_schedule();
68 return 0; 60 return 0;
69} 61}
70 62
71void __lock_buffer(struct buffer_head *bh) 63void __lock_buffer(struct buffer_head *bh)
72{ 64{
73 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, 65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
74 TASK_UNINTERRUPTIBLE); 66 TASK_UNINTERRUPTIBLE);
75} 67}
76EXPORT_SYMBOL(__lock_buffer); 68EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer);
90 */ 82 */
91void __wait_on_buffer(struct buffer_head * bh) 83void __wait_on_buffer(struct buffer_head * bh)
92{ 84{
93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); 85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
94} 86}
95EXPORT_SYMBOL(__wait_on_buffer); 87EXPORT_SYMBOL(__wait_on_buffer);
96 88
@@ -749,10 +741,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
749{ 741{
750 struct buffer_head *bh; 742 struct buffer_head *bh;
751 struct list_head tmp; 743 struct list_head tmp;
752 struct address_space *mapping, *prev_mapping = NULL; 744 struct address_space *mapping;
753 int err = 0, err2; 745 int err = 0, err2;
746 struct blk_plug plug;
754 747
755 INIT_LIST_HEAD(&tmp); 748 INIT_LIST_HEAD(&tmp);
749 blk_start_plug(&plug);
756 750
757 spin_lock(lock); 751 spin_lock(lock);
758 while (!list_empty(list)) { 752 while (!list_empty(list)) {
@@ -775,7 +769,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
775 * still in flight on potentially older 769 * still in flight on potentially older
776 * contents. 770 * contents.
777 */ 771 */
778 write_dirty_buffer(bh, WRITE_SYNC_PLUG); 772 write_dirty_buffer(bh, WRITE_SYNC);
779 773
780 /* 774 /*
781 * Kick off IO for the previous mapping. Note 775 * Kick off IO for the previous mapping. Note
@@ -783,16 +777,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
783 * wait_on_buffer() will do that for us 777 * wait_on_buffer() will do that for us
784 * through sync_buffer(). 778 * through sync_buffer().
785 */ 779 */
786 if (prev_mapping && prev_mapping != mapping)
787 blk_run_address_space(prev_mapping);
788 prev_mapping = mapping;
789
790 brelse(bh); 780 brelse(bh);
791 spin_lock(lock); 781 spin_lock(lock);
792 } 782 }
793 } 783 }
794 } 784 }
795 785
786 spin_unlock(lock);
787 blk_finish_plug(&plug);
788 spin_lock(lock);
789
796 while (!list_empty(&tmp)) { 790 while (!list_empty(&tmp)) {
797 bh = BH_ENTRY(tmp.prev); 791 bh = BH_ENTRY(tmp.prev);
798 get_bh(bh); 792 get_bh(bh);
@@ -1144,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1144 * inode list. 1138 * inode list.
1145 * 1139 *
1146 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1140 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
1147 * mapping->tree_lock and the global inode_lock. 1141 * mapping->tree_lock and mapping->host->i_lock.
1148 */ 1142 */
1149void mark_buffer_dirty(struct buffer_head *bh) 1143void mark_buffer_dirty(struct buffer_head *bh)
1150{ 1144{
@@ -1614,14 +1608,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1614 * prevents this contention from occurring. 1608 * prevents this contention from occurring.
1615 * 1609 *
1616 * If block_write_full_page() is called with wbc->sync_mode == 1610 * If block_write_full_page() is called with wbc->sync_mode ==
1617 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this 1611 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
1618 * causes the writes to be flagged as synchronous writes, but the 1612 * causes the writes to be flagged as synchronous writes.
1619 * block device queue will NOT be unplugged, since usually many pages
1620 * will be pushed to the out before the higher-level caller actually
1621 * waits for the writes to be completed. The various wait functions,
1622 * such as wait_on_writeback_range() will ultimately call sync_page()
1623 * which will ultimately call blk_run_backing_dev(), which will end up
1624 * unplugging the device queue.
1625 */ 1613 */
1626static int __block_write_full_page(struct inode *inode, struct page *page, 1614static int __block_write_full_page(struct inode *inode, struct page *page,
1627 get_block_t *get_block, struct writeback_control *wbc, 1615 get_block_t *get_block, struct writeback_control *wbc,
@@ -1634,7 +1622,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1634 const unsigned blocksize = 1 << inode->i_blkbits; 1622 const unsigned blocksize = 1 << inode->i_blkbits;
1635 int nr_underway = 0; 1623 int nr_underway = 0;
1636 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1624 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1637 WRITE_SYNC_PLUG : WRITE); 1625 WRITE_SYNC : WRITE);
1638 1626
1639 BUG_ON(!PageLocked(page)); 1627 BUG_ON(!PageLocked(page));
1640 1628
@@ -3138,17 +3126,6 @@ out:
3138} 3126}
3139EXPORT_SYMBOL(try_to_free_buffers); 3127EXPORT_SYMBOL(try_to_free_buffers);
3140 3128
3141void block_sync_page(struct page *page)
3142{
3143 struct address_space *mapping;
3144
3145 smp_mb();
3146 mapping = page_mapping(page);
3147 if (mapping)
3148 blk_run_backing_dev(mapping->backing_dev_info, page);
3149}
3150EXPORT_SYMBOL(block_sync_page);
3151
3152/* 3129/*
3153 * There are no bdflush tunables left. But distributions are 3130 * There are no bdflush tunables left. But distributions are
3154 * still running obsolete flush daemons, so we terminate them here. 3131 * still running obsolete flush daemons, so we terminate them here.
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 37fe101a4e0d..1064805e653b 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -197,7 +197,7 @@ struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
197} 197}
198 198
199/* 199/*
200 * update the auxilliary data for an object object on disk 200 * update the auxiliary data for an object object on disk
201 */ 201 */
202static void cachefiles_update_object(struct fscache_object *_object) 202static void cachefiles_update_object(struct fscache_object *_object)
203{ 203{
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 561438b6a50c..e159c529fd2b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -24,7 +24,7 @@
24 * context needs to be associated with the osd write during writeback. 24 * context needs to be associated with the osd write during writeback.
25 * 25 *
26 * Similarly, struct ceph_inode_info maintains a set of counters to 26 * Similarly, struct ceph_inode_info maintains a set of counters to
27 * count dirty pages on the inode. In the absense of snapshots, 27 * count dirty pages on the inode. In the absence of snapshots,
28 * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count. 28 * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
29 * 29 *
30 * When a snapshot is taken (that is, when the client receives 30 * When a snapshot is taken (that is, when the client receives
@@ -92,7 +92,7 @@ static int ceph_set_page_dirty(struct page *page)
92 ci->i_head_snapc = ceph_get_snap_context(snapc); 92 ci->i_head_snapc = ceph_get_snap_context(snapc);
93 ++ci->i_wrbuffer_ref_head; 93 ++ci->i_wrbuffer_ref_head;
94 if (ci->i_wrbuffer_ref == 0) 94 if (ci->i_wrbuffer_ref == 0)
95 igrab(inode); 95 ihold(inode);
96 ++ci->i_wrbuffer_ref; 96 ++ci->i_wrbuffer_ref;
97 dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d " 97 dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
98 "snapc %p seq %lld (%d snaps)\n", 98 "snapc %p seq %lld (%d snaps)\n",
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6b61ded701e1..5323c330bbf3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -765,7 +765,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
765 if (touch) { 765 if (touch) {
766 struct rb_node *q; 766 struct rb_node *q;
767 767
768 /* touch this + preceeding caps */ 768 /* touch this + preceding caps */
769 __touch_cap(cap); 769 __touch_cap(cap);
770 for (q = rb_first(&ci->i_caps); q != p; 770 for (q = rb_first(&ci->i_caps); q != p;
771 q = rb_next(q)) { 771 q = rb_next(q)) {
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 08f65faac112..0dba6915712b 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -210,8 +210,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
210 if (!fsc->debugfs_congestion_kb) 210 if (!fsc->debugfs_congestion_kb)
211 goto out; 211 goto out;
212 212
213 dout("a\n");
214
215 snprintf(name, sizeof(name), "../../bdi/%s", 213 snprintf(name, sizeof(name), "../../bdi/%s",
216 dev_name(fsc->backing_dev_info.dev)); 214 dev_name(fsc->backing_dev_info.dev));
217 fsc->debugfs_bdi = 215 fsc->debugfs_bdi =
@@ -221,7 +219,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
221 if (!fsc->debugfs_bdi) 219 if (!fsc->debugfs_bdi)
222 goto out; 220 goto out;
223 221
224 dout("b\n");
225 fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", 222 fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
226 0600, 223 0600,
227 fsc->client->debugfs_dir, 224 fsc->client->debugfs_dir,
@@ -230,7 +227,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
230 if (!fsc->debugfs_mdsmap) 227 if (!fsc->debugfs_mdsmap)
231 goto out; 228 goto out;
232 229
233 dout("ca\n");
234 fsc->debugfs_mdsc = debugfs_create_file("mdsc", 230 fsc->debugfs_mdsc = debugfs_create_file("mdsc",
235 0600, 231 0600,
236 fsc->client->debugfs_dir, 232 fsc->client->debugfs_dir,
@@ -239,7 +235,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
239 if (!fsc->debugfs_mdsc) 235 if (!fsc->debugfs_mdsc)
240 goto out; 236 goto out;
241 237
242 dout("da\n");
243 fsc->debugfs_caps = debugfs_create_file("caps", 238 fsc->debugfs_caps = debugfs_create_file("caps",
244 0400, 239 0400,
245 fsc->client->debugfs_dir, 240 fsc->client->debugfs_dir,
@@ -248,7 +243,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
248 if (!fsc->debugfs_caps) 243 if (!fsc->debugfs_caps)
249 goto out; 244 goto out;
250 245
251 dout("ea\n");
252 fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", 246 fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
253 0600, 247 0600,
254 fsc->client->debugfs_dir, 248 fsc->client->debugfs_dir,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ebafa65a29b6..1a867a3601ae 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -161,7 +161,7 @@ more:
161 filp->f_pos = di->offset; 161 filp->f_pos = di->offset;
162 err = filldir(dirent, dentry->d_name.name, 162 err = filldir(dirent, dentry->d_name.name,
163 dentry->d_name.len, di->offset, 163 dentry->d_name.len, di->offset,
164 dentry->d_inode->i_ino, 164 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
165 dentry->d_inode->i_mode >> 12); 165 dentry->d_inode->i_mode >> 12);
166 166
167 if (last) { 167 if (last) {
@@ -245,15 +245,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
245 245
246 dout("readdir off 0 -> '.'\n"); 246 dout("readdir off 0 -> '.'\n");
247 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), 247 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
248 inode->i_ino, inode->i_mode >> 12) < 0) 248 ceph_translate_ino(inode->i_sb, inode->i_ino),
249 inode->i_mode >> 12) < 0)
249 return 0; 250 return 0;
250 filp->f_pos = 1; 251 filp->f_pos = 1;
251 off = 1; 252 off = 1;
252 } 253 }
253 if (filp->f_pos == 1) { 254 if (filp->f_pos == 1) {
255 ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
254 dout("readdir off 1 -> '..'\n"); 256 dout("readdir off 1 -> '..'\n");
255 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 257 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
256 filp->f_dentry->d_parent->d_inode->i_ino, 258 ceph_translate_ino(inode->i_sb, ino),
257 inode->i_mode >> 12) < 0) 259 inode->i_mode >> 12) < 0)
258 return 0; 260 return 0;
259 filp->f_pos = 2; 261 filp->f_pos = 2;
@@ -377,7 +379,8 @@ more:
377 if (filldir(dirent, 379 if (filldir(dirent,
378 rinfo->dir_dname[off - fi->offset], 380 rinfo->dir_dname[off - fi->offset],
379 rinfo->dir_dname_len[off - fi->offset], 381 rinfo->dir_dname_len[off - fi->offset],
380 pos, ino, ftype) < 0) { 382 pos,
383 ceph_translate_ino(inode->i_sb, ino), ftype) < 0) {
381 dout("filldir stopping us...\n"); 384 dout("filldir stopping us...\n");
382 return 0; 385 return 0;
383 } 386 }
@@ -1024,14 +1027,13 @@ out_touch:
1024} 1027}
1025 1028
1026/* 1029/*
1027 * When a dentry is released, clear the dir I_COMPLETE if it was part 1030 * Release our ceph_dentry_info.
1028 * of the current dir gen or if this is in the snapshot namespace.
1029 */ 1031 */
1030static void ceph_dentry_release(struct dentry *dentry) 1032static void ceph_d_release(struct dentry *dentry)
1031{ 1033{
1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1034 struct ceph_dentry_info *di = ceph_dentry(dentry);
1033 1035
1034 dout("dentry_release %p\n", dentry); 1036 dout("d_release %p\n", dentry);
1035 if (di) { 1037 if (di) {
1036 ceph_dentry_lru_del(dentry); 1038 ceph_dentry_lru_del(dentry);
1037 if (di->lease_session) 1039 if (di->lease_session)
@@ -1256,14 +1258,14 @@ const struct inode_operations ceph_dir_iops = {
1256 1258
1257const struct dentry_operations ceph_dentry_ops = { 1259const struct dentry_operations ceph_dentry_ops = {
1258 .d_revalidate = ceph_d_revalidate, 1260 .d_revalidate = ceph_d_revalidate,
1259 .d_release = ceph_dentry_release, 1261 .d_release = ceph_d_release,
1260}; 1262};
1261 1263
1262const struct dentry_operations ceph_snapdir_dentry_ops = { 1264const struct dentry_operations ceph_snapdir_dentry_ops = {
1263 .d_revalidate = ceph_snapdir_d_revalidate, 1265 .d_revalidate = ceph_snapdir_d_revalidate,
1264 .d_release = ceph_dentry_release, 1266 .d_release = ceph_d_release,
1265}; 1267};
1266 1268
1267const struct dentry_operations ceph_snap_dentry_ops = { 1269const struct dentry_operations ceph_snap_dentry_ops = {
1268 .d_release = ceph_dentry_release, 1270 .d_release = ceph_d_release,
1269}; 1271};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7d0e4a82d898..159b512d5a27 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -564,11 +564,19 @@ more:
564 * start_request so that a tid has been assigned. 564 * start_request so that a tid has been assigned.
565 */ 565 */
566 spin_lock(&ci->i_unsafe_lock); 566 spin_lock(&ci->i_unsafe_lock);
567 list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); 567 list_add_tail(&req->r_unsafe_item,
568 &ci->i_unsafe_writes);
568 spin_unlock(&ci->i_unsafe_lock); 569 spin_unlock(&ci->i_unsafe_lock);
569 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); 570 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
570 } 571 }
572
571 ret = ceph_osdc_wait_request(&fsc->client->osdc, req); 573 ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
574 if (ret < 0 && req->r_safe_callback) {
575 spin_lock(&ci->i_unsafe_lock);
576 list_del_init(&req->r_unsafe_item);
577 spin_unlock(&ci->i_unsafe_lock);
578 ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
579 }
572 } 580 }
573 581
574 if (file->f_flags & O_DIRECT) 582 if (file->f_flags & O_DIRECT)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 193bfa5e9cbd..b54c97da1c43 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work);
36/* 36/*
37 * find or create an inode, given the ceph ino number 37 * find or create an inode, given the ceph ino number
38 */ 38 */
39static int ceph_set_ino_cb(struct inode *inode, void *data)
40{
41 ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
42 inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
43 return 0;
44}
45
39struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) 46struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
40{ 47{
41 struct inode *inode; 48 struct inode *inode;
@@ -1030,9 +1037,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1030 dout("fill_trace doing d_move %p -> %p\n", 1037 dout("fill_trace doing d_move %p -> %p\n",
1031 req->r_old_dentry, dn); 1038 req->r_old_dentry, dn);
1032 1039
1033 /* d_move screws up d_subdirs order */
1034 ceph_i_clear(dir, CEPH_I_COMPLETE);
1035
1036 d_move(req->r_old_dentry, dn); 1040 d_move(req->r_old_dentry, dn);
1037 dout(" src %p '%.*s' dst %p '%.*s'\n", 1041 dout(" src %p '%.*s' dst %p '%.*s'\n",
1038 req->r_old_dentry, 1042 req->r_old_dentry,
@@ -1044,12 +1048,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1044 rehashing bug in vfs_rename_dir */ 1048 rehashing bug in vfs_rename_dir */
1045 ceph_invalidate_dentry_lease(dn); 1049 ceph_invalidate_dentry_lease(dn);
1046 1050
1047 /* take overwritten dentry's readdir offset */ 1051 /*
1048 dout("dn %p gets %p offset %lld (old offset %lld)\n", 1052 * d_move() puts the renamed dentry at the end of
1049 req->r_old_dentry, dn, ceph_dentry(dn)->offset, 1053 * d_subdirs. We need to assign it an appropriate
1054 * directory offset so we can behave when holding
1055 * I_COMPLETE.
1056 */
1057 ceph_set_dentry_offset(req->r_old_dentry);
1058 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1050 ceph_dentry(req->r_old_dentry)->offset); 1059 ceph_dentry(req->r_old_dentry)->offset);
1051 ceph_dentry(req->r_old_dentry)->offset =
1052 ceph_dentry(dn)->offset;
1053 1060
1054 dn = req->r_old_dentry; /* use old_dentry */ 1061 dn = req->r_old_dentry; /* use old_dentry */
1055 in = dn->d_inode; 1062 in = dn->d_inode;
@@ -1809,7 +1816,7 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
1809 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); 1816 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
1810 if (!err) { 1817 if (!err) {
1811 generic_fillattr(inode, stat); 1818 generic_fillattr(inode, stat);
1812 stat->ino = inode->i_ino; 1819 stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
1813 if (ceph_snap(inode) != CEPH_NOSNAP) 1820 if (ceph_snap(inode) != CEPH_NOSNAP)
1814 stat->dev = ceph_snap(inode); 1821 stat->dev = ceph_snap(inode);
1815 else 1822 else
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a1ee8fa3a8e7..f60b07b0feb0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3215,9 +3215,15 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
3215{ 3215{
3216 struct ceph_mds_client *mdsc = fsc->mdsc; 3216 struct ceph_mds_client *mdsc = fsc->mdsc;
3217 3217
3218 dout("mdsc_destroy %p\n", mdsc);
3218 ceph_mdsc_stop(mdsc); 3219 ceph_mdsc_stop(mdsc);
3220
3221 /* flush out any connection work with references to us */
3222 ceph_msgr_flush();
3223
3219 fsc->mdsc = NULL; 3224 fsc->mdsc = NULL;
3220 kfree(mdsc); 3225 kfree(mdsc);
3226 dout("mdsc_destroy %p done\n", mdsc);
3221} 3227}
3222 3228
3223 3229
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f40b9139e437..e86ec1155f8f 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -342,7 +342,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
342 num = 0; 342 num = 0;
343 snapc->seq = realm->seq; 343 snapc->seq = realm->seq;
344 if (parent) { 344 if (parent) {
345 /* include any of parent's snaps occuring _after_ my 345 /* include any of parent's snaps occurring _after_ my
346 parent became my parent */ 346 parent became my parent */
347 for (i = 0; i < parent->cached_context->num_snaps; i++) 347 for (i = 0; i < parent->cached_context->num_snaps; i++)
348 if (parent->cached_context->snaps[i] >= 348 if (parent->cached_context->snaps[i] >=
@@ -463,8 +463,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
463 463
464 dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, 464 dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
465 capsnap, snapc); 465 capsnap, snapc);
466 igrab(inode); 466 ihold(inode);
467 467
468 atomic_set(&capsnap->nref, 1); 468 atomic_set(&capsnap->nref, 1);
469 capsnap->ci = ci; 469 capsnap->ci = ci;
470 INIT_LIST_HEAD(&capsnap->ci_item); 470 INIT_LIST_HEAD(&capsnap->ci_item);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9c5085465a63..f2f77fd3c14c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -131,6 +131,7 @@ enum {
131 Opt_rbytes, 131 Opt_rbytes,
132 Opt_norbytes, 132 Opt_norbytes,
133 Opt_noasyncreaddir, 133 Opt_noasyncreaddir,
134 Opt_ino32,
134}; 135};
135 136
136static match_table_t fsopt_tokens = { 137static match_table_t fsopt_tokens = {
@@ -150,6 +151,7 @@ static match_table_t fsopt_tokens = {
150 {Opt_rbytes, "rbytes"}, 151 {Opt_rbytes, "rbytes"},
151 {Opt_norbytes, "norbytes"}, 152 {Opt_norbytes, "norbytes"},
152 {Opt_noasyncreaddir, "noasyncreaddir"}, 153 {Opt_noasyncreaddir, "noasyncreaddir"},
154 {Opt_ino32, "ino32"},
153 {-1, NULL} 155 {-1, NULL}
154}; 156};
155 157
@@ -225,6 +227,9 @@ static int parse_fsopt_token(char *c, void *private)
225 case Opt_noasyncreaddir: 227 case Opt_noasyncreaddir:
226 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 228 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
227 break; 229 break;
230 case Opt_ino32:
231 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
232 break;
228 default: 233 default:
229 BUG_ON(token); 234 BUG_ON(token);
230 } 235 }
@@ -288,7 +293,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
288 fsopt->sb_flags = flags; 293 fsopt->sb_flags = flags;
289 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 294 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
290 295
291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 296 fsopt->rsize = CEPH_RSIZE_DEFAULT;
292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 297 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
293 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 298 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
294 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 299 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
@@ -348,7 +353,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
348 353
349 if (opt->name) 354 if (opt->name)
350 seq_printf(m, ",name=%s", opt->name); 355 seq_printf(m, ",name=%s", opt->name);
351 if (opt->secret) 356 if (opt->key)
352 seq_puts(m, ",secret=<hidden>"); 357 seq_puts(m, ",secret=<hidden>");
353 358
354 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 359 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
@@ -370,7 +375,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
370 375
371 if (fsopt->wsize) 376 if (fsopt->wsize)
372 seq_printf(m, ",wsize=%d", fsopt->wsize); 377 seq_printf(m, ",wsize=%d", fsopt->wsize);
373 if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) 378 if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
374 seq_printf(m, ",rsize=%d", fsopt->rsize); 379 seq_printf(m, ",rsize=%d", fsopt->rsize);
375 if (fsopt->congestion_kb != default_congestion_kb()) 380 if (fsopt->congestion_kb != default_congestion_kb())
376 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 381 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 20b907d76ae2..619fe719968f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -27,6 +27,7 @@
27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ 27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ 28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ 29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
30#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
30 31
31#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 32#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
32 33
@@ -35,6 +36,7 @@
35#define ceph_test_mount_opt(fsc, opt) \ 36#define ceph_test_mount_opt(fsc, opt) \
36 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) 37 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
37 38
39#define CEPH_RSIZE_DEFAULT (512*1024) /* readahead */
38#define CEPH_MAX_READDIR_DEFAULT 1024 40#define CEPH_MAX_READDIR_DEFAULT 1024
39#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) 41#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
40#define CEPH_SNAPDIRNAME_DEFAULT ".snap" 42#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -319,6 +321,16 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
319 return container_of(inode, struct ceph_inode_info, vfs_inode); 321 return container_of(inode, struct ceph_inode_info, vfs_inode);
320} 322}
321 323
324static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
325{
326 return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
327}
328
329static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
330{
331 return (struct ceph_fs_client *)sb->s_fs_info;
332}
333
322static inline struct ceph_vino ceph_vino(struct inode *inode) 334static inline struct ceph_vino ceph_vino(struct inode *inode)
323{ 335{
324 return ceph_inode(inode)->i_vino; 336 return ceph_inode(inode)->i_vino;
@@ -327,19 +339,49 @@ static inline struct ceph_vino ceph_vino(struct inode *inode)
327/* 339/*
328 * ino_t is <64 bits on many architectures, blech. 340 * ino_t is <64 bits on many architectures, blech.
329 * 341 *
330 * don't include snap in ino hash, at least for now. 342 * i_ino (kernel inode) st_ino (userspace)
343 * i386 32 32
344 * x86_64+ino32 64 32
345 * x86_64 64 64
346 */
347static inline u32 ceph_ino_to_ino32(ino_t ino)
348{
349 ino ^= ino >> (sizeof(ino) * 8 - 32);
350 if (!ino)
351 ino = 1;
352 return ino;
353}
354
355/*
356 * kernel i_ino value
331 */ 357 */
332static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) 358static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
333{ 359{
334 ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ 360 ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */
335#if BITS_PER_LONG == 32 361#if BITS_PER_LONG == 32
336 ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; 362 ino = ceph_ino_to_ino32(ino);
337 if (!ino)
338 ino = 1;
339#endif 363#endif
340 return ino; 364 return ino;
341} 365}
342 366
367/*
368 * user-visible ino (stat, filldir)
369 */
370#if BITS_PER_LONG == 32
371static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
372{
373 return ino;
374}
375#else
376static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
377{
378 if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
379 ino = ceph_ino_to_ino32(ino);
380 return ino;
381}
382#endif
383
384
343/* for printf-style formatting */ 385/* for printf-style formatting */
344#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap 386#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
345 387
@@ -428,13 +470,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
428 return ((loff_t)frag << 32) | (loff_t)off; 470 return ((loff_t)frag << 32) | (loff_t)off;
429} 471}
430 472
431static inline int ceph_set_ino_cb(struct inode *inode, void *data)
432{
433 ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
434 inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
435 return 0;
436}
437
438/* 473/*
439 * caps helpers 474 * caps helpers
440 */ 475 */
@@ -503,15 +538,6 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
503 int *total, int *avail, int *used, 538 int *total, int *avail, int *used,
504 int *reserved, int *min); 539 int *reserved, int *min);
505 540
506static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
507{
508 return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
509}
510
511static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
512{
513 return (struct ceph_fs_client *)sb->s_fs_info;
514}
515 541
516 542
517/* 543/*
diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS
index 7f7fa3c302af..ea940b1db77b 100644
--- a/fs/cifs/AUTHORS
+++ b/fs/cifs/AUTHORS
@@ -35,7 +35,7 @@ Adrian Bunk (kcalloc cleanups)
35Miklos Szeredi 35Miklos Szeredi
36Kazeon team for various fixes especially for 2.4 version. 36Kazeon team for various fixes especially for 2.4 version.
37Asser Ferno (Change Notify support) 37Asser Ferno (Change Notify support)
38Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup 38Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
39Gunter Kukkukk (testing and suggestions for support of old servers) 39Gunter Kukkukk (testing and suggestions for support of old servers)
40Igor Mammedov (DFS support) 40Igor Mammedov (DFS support)
41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) 41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
diff --git a/fs/cifs/README b/fs/cifs/README
index fe1683590828..74ab165fc646 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -685,22 +685,6 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
685 support and want to map the uid and gid fields 685 support and want to map the uid and gid fields
686 to values supplied at mount (rather than the 686 to values supplied at mount (rather than the
687 actual values, then set this to zero. (default 1) 687 actual values, then set this to zero. (default 1)
688Experimental When set to 1 used to enable certain experimental
689 features (currently enables multipage writes
690 when signing is enabled, the multipage write
691 performance enhancement was disabled when
692 signing turned on in case buffer was modified
693 just before it was sent, also this flag will
694 be used to use the new experimental directory change
695 notification code). When set to 2 enables
696 an additional experimental feature, "raw ntlmssp"
697 session establishment support (which allows
698 specifying "sec=ntlmssp" on mount). The Linux cifs
699 module will use ntlmv2 authentication encapsulated
700 in "raw ntlmssp" (not using SPNEGO) when
701 "sec=ntlmssp" is specified on mount.
702 This support also requires building cifs with
703 the CONFIG_CIFS_EXPERIMENTAL configuration flag.
704 688
705These experimental features and tracing can be enabled by changing flags in 689These experimental features and tracing can be enabled by changing flags in
706/proc/fs/cifs (after the cifs module has been installed or built into the 690/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index e654dfd092c3..53d57a3fe427 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -50,7 +50,7 @@ void cifs_fscache_unregister(void)
50 */ 50 */
51struct cifs_server_key { 51struct cifs_server_key {
52 uint16_t family; /* address family */ 52 uint16_t family; /* address family */
53 uint16_t port; /* IP port */ 53 __be16 port; /* IP port */
54 union { 54 union {
55 struct in_addr ipv4_addr; 55 struct in_addr ipv4_addr;
56 struct in6_addr ipv6_addr; 56 struct in6_addr ipv6_addr;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 65829d32128c..30d01bc90855 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -423,7 +423,6 @@ static const struct file_operations cifs_lookup_cache_proc_fops;
423static const struct file_operations traceSMB_proc_fops; 423static const struct file_operations traceSMB_proc_fops;
424static const struct file_operations cifs_multiuser_mount_proc_fops; 424static const struct file_operations cifs_multiuser_mount_proc_fops;
425static const struct file_operations cifs_security_flags_proc_fops; 425static const struct file_operations cifs_security_flags_proc_fops;
426static const struct file_operations cifs_experimental_proc_fops;
427static const struct file_operations cifs_linux_ext_proc_fops; 426static const struct file_operations cifs_linux_ext_proc_fops;
428 427
429void 428void
@@ -441,8 +440,6 @@ cifs_proc_init(void)
441 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops); 440 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
442 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops); 441 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
443 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops); 442 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
444 proc_create("Experimental", 0, proc_fs_cifs,
445 &cifs_experimental_proc_fops);
446 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs, 443 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
447 &cifs_linux_ext_proc_fops); 444 &cifs_linux_ext_proc_fops);
448 proc_create("MultiuserMount", 0, proc_fs_cifs, 445 proc_create("MultiuserMount", 0, proc_fs_cifs,
@@ -469,7 +466,6 @@ cifs_proc_clean(void)
469 remove_proc_entry("OplockEnabled", proc_fs_cifs); 466 remove_proc_entry("OplockEnabled", proc_fs_cifs);
470 remove_proc_entry("SecurityFlags", proc_fs_cifs); 467 remove_proc_entry("SecurityFlags", proc_fs_cifs);
471 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 468 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
472 remove_proc_entry("Experimental", proc_fs_cifs);
473 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 469 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
474 remove_proc_entry("fs/cifs", NULL); 470 remove_proc_entry("fs/cifs", NULL);
475} 471}
@@ -550,45 +546,6 @@ static const struct file_operations cifs_oplock_proc_fops = {
550 .write = cifs_oplock_proc_write, 546 .write = cifs_oplock_proc_write,
551}; 547};
552 548
553static int cifs_experimental_proc_show(struct seq_file *m, void *v)
554{
555 seq_printf(m, "%d\n", experimEnabled);
556 return 0;
557}
558
559static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
560{
561 return single_open(file, cifs_experimental_proc_show, NULL);
562}
563
564static ssize_t cifs_experimental_proc_write(struct file *file,
565 const char __user *buffer, size_t count, loff_t *ppos)
566{
567 char c;
568 int rc;
569
570 rc = get_user(c, buffer);
571 if (rc)
572 return rc;
573 if (c == '0' || c == 'n' || c == 'N')
574 experimEnabled = 0;
575 else if (c == '1' || c == 'y' || c == 'Y')
576 experimEnabled = 1;
577 else if (c == '2')
578 experimEnabled = 2;
579
580 return count;
581}
582
583static const struct file_operations cifs_experimental_proc_fops = {
584 .owner = THIS_MODULE,
585 .open = cifs_experimental_proc_open,
586 .read = seq_read,
587 .llseek = seq_lseek,
588 .release = single_release,
589 .write = cifs_experimental_proc_write,
590};
591
592static int cifs_linux_ext_proc_show(struct seq_file *m, void *v) 549static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
593{ 550{
594 seq_printf(m, "%d\n", linuxExtEnabled); 551 seq_printf(m, "%d\n", linuxExtEnabled);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 0a265ad9e426..2b68ac57d97d 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -53,7 +53,7 @@ void cifs_dfs_release_automount_timer(void)
53 * 53 *
54 * Extracts sharename form full UNC. 54 * Extracts sharename form full UNC.
55 * i.e. strips from UNC trailing path that is not part of share 55 * i.e. strips from UNC trailing path that is not part of share
56 * name and fixup missing '\' in the begining of DFS node refferal 56 * name and fixup missing '\' in the beginning of DFS node refferal
57 * if necessary. 57 * if necessary.
58 * Returns pointer to share name on success or ERR_PTR on error. 58 * Returns pointer to share name on success or ERR_PTR on error.
59 * Caller is responsible for freeing returned string. 59 * Caller is responsible for freeing returned string.
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4dfba8283165..33d221394aca 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -113,7 +113,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
113 MAX_MECH_STR_LEN + 113 MAX_MECH_STR_LEN +
114 UID_KEY_LEN + (sizeof(uid_t) * 2) + 114 UID_KEY_LEN + (sizeof(uid_t) * 2) +
115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + 115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
116 USER_KEY_LEN + strlen(sesInfo->userName) + 116 USER_KEY_LEN + strlen(sesInfo->user_name) +
117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; 117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
118 118
119 spnego_key = ERR_PTR(-ENOMEM); 119 spnego_key = ERR_PTR(-ENOMEM);
@@ -153,7 +153,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); 153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
154 154
155 dp = description + strlen(description); 155 dp = description + strlen(description);
156 sprintf(dp, ";user=%s", sesInfo->userName); 156 sprintf(dp, ";user=%s", sesInfo->user_name);
157 157
158 dp = description + strlen(description); 158 dp = description + strlen(description);
159 sprintf(dp, ";pid=0x%x", current->pid); 159 sprintf(dp, ";pid=0x%x", current->pid);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index fc0fd4fde306..23d43cde4306 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -90,7 +90,7 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
90 case UNI_COLON: 90 case UNI_COLON:
91 *target = ':'; 91 *target = ':';
92 break; 92 break;
93 case UNI_ASTERIK: 93 case UNI_ASTERISK:
94 *target = '*'; 94 *target = '*';
95 break; 95 break;
96 case UNI_QUESTION: 96 case UNI_QUESTION:
@@ -264,40 +264,40 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
264 * names are little endian 16 bit Unicode on the wire 264 * names are little endian 16 bit Unicode on the wire
265 */ 265 */
266int 266int
267cifsConvertToUCS(__le16 *target, const char *source, int maxlen, 267cifsConvertToUCS(__le16 *target, const char *source, int srclen,
268 const struct nls_table *cp, int mapChars) 268 const struct nls_table *cp, int mapChars)
269{ 269{
270 int i, j, charlen; 270 int i, j, charlen;
271 int len_remaining = maxlen;
272 char src_char; 271 char src_char;
273 __u16 temp; 272 __le16 dst_char;
273 wchar_t tmp;
274 274
275 if (!mapChars) 275 if (!mapChars)
276 return cifs_strtoUCS(target, source, PATH_MAX, cp); 276 return cifs_strtoUCS(target, source, PATH_MAX, cp);
277 277
278 for (i = 0, j = 0; i < maxlen; j++) { 278 for (i = 0, j = 0; i < srclen; j++) {
279 src_char = source[i]; 279 src_char = source[i];
280 switch (src_char) { 280 switch (src_char) {
281 case 0: 281 case 0:
282 put_unaligned_le16(0, &target[j]); 282 put_unaligned(0, &target[j]);
283 goto ctoUCS_out; 283 goto ctoUCS_out;
284 case ':': 284 case ':':
285 temp = UNI_COLON; 285 dst_char = cpu_to_le16(UNI_COLON);
286 break; 286 break;
287 case '*': 287 case '*':
288 temp = UNI_ASTERIK; 288 dst_char = cpu_to_le16(UNI_ASTERISK);
289 break; 289 break;
290 case '?': 290 case '?':
291 temp = UNI_QUESTION; 291 dst_char = cpu_to_le16(UNI_QUESTION);
292 break; 292 break;
293 case '<': 293 case '<':
294 temp = UNI_LESSTHAN; 294 dst_char = cpu_to_le16(UNI_LESSTHAN);
295 break; 295 break;
296 case '>': 296 case '>':
297 temp = UNI_GRTRTHAN; 297 dst_char = cpu_to_le16(UNI_GRTRTHAN);
298 break; 298 break;
299 case '|': 299 case '|':
300 temp = UNI_PIPE; 300 dst_char = cpu_to_le16(UNI_PIPE);
301 break; 301 break;
302 /* 302 /*
303 * FIXME: We can not handle remapping backslash (UNI_SLASH) 303 * FIXME: We can not handle remapping backslash (UNI_SLASH)
@@ -305,17 +305,17 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
305 * as they use backslash as separator. 305 * as they use backslash as separator.
306 */ 306 */
307 default: 307 default:
308 charlen = cp->char2uni(source+i, len_remaining, 308 charlen = cp->char2uni(source + i, srclen - i, &tmp);
309 &temp); 309 dst_char = cpu_to_le16(tmp);
310
310 /* 311 /*
311 * if no match, use question mark, which at least in 312 * if no match, use question mark, which at least in
312 * some cases serves as wild card 313 * some cases serves as wild card
313 */ 314 */
314 if (charlen < 1) { 315 if (charlen < 1) {
315 temp = 0x003f; 316 dst_char = cpu_to_le16(0x003f);
316 charlen = 1; 317 charlen = 1;
317 } 318 }
318 len_remaining -= charlen;
319 /* 319 /*
320 * character may take more than one byte in the source 320 * character may take more than one byte in the source
321 * string, but will take exactly two bytes in the 321 * string, but will take exactly two bytes in the
@@ -324,9 +324,8 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
324 i += charlen; 324 i += charlen;
325 continue; 325 continue;
326 } 326 }
327 put_unaligned_le16(temp, &target[j]); 327 put_unaligned(dst_char, &target[j]);
328 i++; /* move to next char in source string */ 328 i++; /* move to next char in source string */
329 len_remaining--;
330 } 329 }
331 330
332ctoUCS_out: 331ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 7fe6b52df507..644dd882a560 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -44,7 +44,7 @@
44 * reserved symbols (along with \ and /), otherwise illegal to store 44 * reserved symbols (along with \ and /), otherwise illegal to store
45 * in filenames in NTFS 45 * in filenames in NTFS
46 */ 46 */
47#define UNI_ASTERIK (__u16) ('*' + 0xF000) 47#define UNI_ASTERISK (__u16) ('*' + 0xF000)
48#define UNI_QUESTION (__u16) ('?' + 0xF000) 48#define UNI_QUESTION (__u16) ('?' + 0xF000)
49#define UNI_COLON (__u16) (':' + 0xF000) 49#define UNI_COLON (__u16) (':' + 0xF000)
50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) 50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index a51585f9852b..d1a016be73ba 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -30,12 +30,13 @@
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31#include <linux/random.h> 31#include <linux/random.h>
32 32
33/* Calculate and return the CIFS signature based on the mac key and SMB PDU */ 33/*
34/* the 16 byte signature must be allocated by the caller */ 34 * Calculate and return the CIFS signature based on the mac key and SMB PDU.
35/* Note we only use the 1st eight bytes */ 35 * The 16 byte signature must be allocated by the caller. Note we only use the
36/* Note that the smb header signature field on input contains the 36 * 1st eight bytes and that the smb header signature field on input contains
37 sequence number before this function is called */ 37 * the sequence number before this function is called. Also, this function
38 38 * should be called with the server->srv_mutex held.
39 */
39static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 40static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
40 struct TCP_Server_Info *server, char *signature) 41 struct TCP_Server_Info *server, char *signature)
41{ 42{
@@ -209,8 +210,10 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
209 cpu_to_le32(expected_sequence_number); 210 cpu_to_le32(expected_sequence_number);
210 cifs_pdu->Signature.Sequence.Reserved = 0; 211 cifs_pdu->Signature.Sequence.Reserved = 0;
211 212
213 mutex_lock(&server->srv_mutex);
212 rc = cifs_calculate_signature(cifs_pdu, server, 214 rc = cifs_calculate_signature(cifs_pdu, server,
213 what_we_think_sig_should_be); 215 what_we_think_sig_should_be);
216 mutex_unlock(&server->srv_mutex);
214 217
215 if (rc) 218 if (rc)
216 return rc; 219 return rc;
@@ -469,15 +472,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
469 return rc; 472 return rc;
470 } 473 }
471 474
472 /* convert ses->userName to unicode and uppercase */ 475 /* convert ses->user_name to unicode and uppercase */
473 len = strlen(ses->userName); 476 len = strlen(ses->user_name);
474 user = kmalloc(2 + (len * 2), GFP_KERNEL); 477 user = kmalloc(2 + (len * 2), GFP_KERNEL);
475 if (user == NULL) { 478 if (user == NULL) {
476 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); 479 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
477 rc = -ENOMEM; 480 rc = -ENOMEM;
478 goto calc_exit_2; 481 goto calc_exit_2;
479 } 482 }
480 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 483 len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp);
481 UniStrupr(user); 484 UniStrupr(user);
482 485
483 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, 486 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f2970136d17d..5c412b33cd7c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -53,7 +53,6 @@ int cifsFYI = 0;
53int cifsERROR = 1; 53int cifsERROR = 1;
54int traceSMB = 0; 54int traceSMB = 0;
55unsigned int oplockEnabled = 1; 55unsigned int oplockEnabled = 1;
56unsigned int experimEnabled = 0;
57unsigned int linuxExtEnabled = 1; 56unsigned int linuxExtEnabled = 1;
58unsigned int lookupCacheEnabled = 1; 57unsigned int lookupCacheEnabled = 1;
59unsigned int multiuser_mount = 0; 58unsigned int multiuser_mount = 0;
@@ -127,6 +126,7 @@ cifs_read_super(struct super_block *sb, void *data,
127 kfree(cifs_sb); 126 kfree(cifs_sb);
128 return rc; 127 return rc;
129 } 128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 130
131#ifdef CONFIG_CIFS_DFS_UPCALL 131#ifdef CONFIG_CIFS_DFS_UPCALL
132 /* copy mount params to sb for use in submounts */ 132 /* copy mount params to sb for use in submounts */
@@ -409,8 +409,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
409 409
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
411 seq_printf(s, ",multiuser"); 411 seq_printf(s, ",multiuser");
412 else if (tcon->ses->userName) 412 else if (tcon->ses->user_name)
413 seq_printf(s, ",username=%s", tcon->ses->userName); 413 seq_printf(s, ",username=%s", tcon->ses->user_name);
414 414
415 if (tcon->ses->domainName) 415 if (tcon->ses->domainName)
416 seq_printf(s, ",domain=%s", tcon->ses->domainName); 416 seq_printf(s, ",domain=%s", tcon->ses->domainName);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 17afb0fbcaed..a5d1106fcbde 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -37,10 +37,9 @@
37 37
38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) 38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
39#define MAX_SERVER_SIZE 15 39#define MAX_SERVER_SIZE 15
40#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ 40#define MAX_SHARE_SIZE 80
41#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null 41#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */
42 termination then *2 for unicode versions */ 42#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
43#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
44 43
45#define CIFS_MIN_RCV_POOL 4 44#define CIFS_MIN_RCV_POOL 4
46 45
@@ -92,7 +91,8 @@ enum statusEnum {
92 CifsNew = 0, 91 CifsNew = 0,
93 CifsGood, 92 CifsGood,
94 CifsExiting, 93 CifsExiting,
95 CifsNeedReconnect 94 CifsNeedReconnect,
95 CifsNeedNegotiate
96}; 96};
97 97
98enum securityEnum { 98enum securityEnum {
@@ -274,7 +274,7 @@ struct cifsSesInfo {
274 int capabilities; 274 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 276 TCP names - will ipv6 and sctp addresses fit? */
277 char userName[MAX_USERNAME_SIZE + 1]; 277 char *user_name;
278 char *domainName; 278 char *domainName;
279 char *password; 279 char *password;
280 struct session_key auth_key; 280 struct session_key auth_key;
@@ -817,7 +817,6 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
817 have the uid/password or Kerberos credential 817 have the uid/password or Kerberos credential
818 or equivalent for current user */ 818 or equivalent for current user */
819GLOBAL_EXTERN unsigned int oplockEnabled; 819GLOBAL_EXTERN unsigned int oplockEnabled;
820GLOBAL_EXTERN unsigned int experimEnabled;
821GLOBAL_EXTERN unsigned int lookupCacheEnabled; 820GLOBAL_EXTERN unsigned int lookupCacheEnabled;
822GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent 821GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
823 with more secure ntlmssp2 challenge/resp */ 822 with more secure ntlmssp2 challenge/resp */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 904aa47e3515..df959bae6728 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -142,9 +142,9 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
142 */ 142 */
143 while (server->tcpStatus == CifsNeedReconnect) { 143 while (server->tcpStatus == CifsNeedReconnect) {
144 wait_event_interruptible_timeout(server->response_q, 144 wait_event_interruptible_timeout(server->response_q,
145 (server->tcpStatus == CifsGood), 10 * HZ); 145 (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
146 146
147 /* is TCP session is reestablished now ?*/ 147 /* are we still trying to reconnect? */
148 if (server->tcpStatus != CifsNeedReconnect) 148 if (server->tcpStatus != CifsNeedReconnect)
149 break; 149 break;
150 150
@@ -729,7 +729,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
729 return rc; 729 return rc;
730 730
731 /* set up echo request */ 731 /* set up echo request */
732 smb->hdr.Tid = cpu_to_le16(0xffff); 732 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 733 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 734 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 735 put_bcc_le(1, &smb->hdr);
@@ -1884,10 +1884,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1884 __constant_cpu_to_le16(CIFS_WRLCK)) 1884 __constant_cpu_to_le16(CIFS_WRLCK))
1885 pLockData->fl_type = F_WRLCK; 1885 pLockData->fl_type = F_WRLCK;
1886 1886
1887 pLockData->fl_start = parm_data->start; 1887 pLockData->fl_start = le64_to_cpu(parm_data->start);
1888 pLockData->fl_end = parm_data->start + 1888 pLockData->fl_end = pLockData->fl_start +
1889 parm_data->length - 1; 1889 le64_to_cpu(parm_data->length) - 1;
1890 pLockData->fl_pid = parm_data->pid; 1890 pLockData->fl_pid = le32_to_cpu(parm_data->pid);
1891 } 1891 }
1892 } 1892 }
1893 1893
@@ -5247,7 +5247,7 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5247 * Samba server ignores set of file size to zero due to bugs in some 5247 * Samba server ignores set of file size to zero due to bugs in some
5248 * older clients, but we should be precise - we use SetFileSize to 5248 * older clients, but we should be precise - we use SetFileSize to
5249 * set file size and do not want to truncate file size to zero 5249 * set file size and do not want to truncate file size to zero
5250 * accidently as happened on one Samba server beta by putting 5250 * accidentally as happened on one Samba server beta by putting
5251 * zero instead of -1 here 5251 * zero instead of -1 here
5252 */ 5252 */
5253 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); 5253 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8d6c17ab593d..db9d55b507d0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -199,8 +199,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
199 } 199 }
200 spin_unlock(&GlobalMid_Lock); 200 spin_unlock(&GlobalMid_Lock);
201 201
202 while ((server->tcpStatus != CifsExiting) && 202 while (server->tcpStatus == CifsNeedReconnect) {
203 (server->tcpStatus != CifsGood)) {
204 try_to_freeze(); 203 try_to_freeze();
205 204
206 /* we should try only the port we connected to before */ 205 /* we should try only the port we connected to before */
@@ -212,7 +211,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
212 atomic_inc(&tcpSesReconnectCount); 211 atomic_inc(&tcpSesReconnectCount);
213 spin_lock(&GlobalMid_Lock); 212 spin_lock(&GlobalMid_Lock);
214 if (server->tcpStatus != CifsExiting) 213 if (server->tcpStatus != CifsExiting)
215 server->tcpStatus = CifsGood; 214 server->tcpStatus = CifsNeedNegotiate;
216 spin_unlock(&GlobalMid_Lock); 215 spin_unlock(&GlobalMid_Lock);
217 } 216 }
218 } 217 }
@@ -248,24 +247,24 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
248 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); 247 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
249 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); 248 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
250 249
251 remaining = total_data_size - data_in_this_rsp; 250 if (total_data_size == data_in_this_rsp)
252
253 if (remaining == 0)
254 return 0; 251 return 0;
255 else if (remaining < 0) { 252 else if (total_data_size < data_in_this_rsp) {
256 cFYI(1, "total data %d smaller than data in frame %d", 253 cFYI(1, "total data %d smaller than data in frame %d",
257 total_data_size, data_in_this_rsp); 254 total_data_size, data_in_this_rsp);
258 return -EINVAL; 255 return -EINVAL;
259 } else {
260 cFYI(1, "missing %d bytes from transact2, check next response",
261 remaining);
262 if (total_data_size > maxBufSize) {
263 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
264 total_data_size, maxBufSize);
265 return -EINVAL;
266 }
267 return remaining;
268 } 256 }
257
258 remaining = total_data_size - data_in_this_rsp;
259
260 cFYI(1, "missing %d bytes from transact2, check next response",
261 remaining);
262 if (total_data_size > maxBufSize) {
263 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
264 total_data_size, maxBufSize);
265 return -EINVAL;
266 }
267 return remaining;
269} 268}
270 269
271static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) 270static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
@@ -421,7 +420,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
421 pdu_length = 4; /* enough to get RFC1001 header */ 420 pdu_length = 4; /* enough to get RFC1001 header */
422 421
423incomplete_rcv: 422incomplete_rcv:
424 if (echo_retries > 0 && 423 if (echo_retries > 0 && server->tcpStatus == CifsGood &&
425 time_after(jiffies, server->lstrp + 424 time_after(jiffies, server->lstrp +
426 (echo_retries * SMB_ECHO_INTERVAL))) { 425 (echo_retries * SMB_ECHO_INTERVAL))) {
427 cERROR(1, "Server %s has not responded in %d seconds. " 426 cERROR(1, "Server %s has not responded in %d seconds. "
@@ -881,7 +880,8 @@ cifs_parse_mount_options(char *options, const char *devname,
881 /* null user, ie anonymous, authentication */ 880 /* null user, ie anonymous, authentication */
882 vol->nullauth = 1; 881 vol->nullauth = 1;
883 } 882 }
884 if (strnlen(value, 200) < 200) { 883 if (strnlen(value, MAX_USERNAME_SIZE) <
884 MAX_USERNAME_SIZE) {
885 vol->username = value; 885 vol->username = value;
886 } else { 886 } else {
887 printk(KERN_WARNING "CIFS: username too long\n"); 887 printk(KERN_WARNING "CIFS: username too long\n");
@@ -1472,7 +1472,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1472static bool 1472static bool
1473match_port(struct TCP_Server_Info *server, struct sockaddr *addr) 1473match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
1474{ 1474{
1475 unsigned short int port, *sport; 1475 __be16 port, *sport;
1476 1476
1477 switch (addr->sa_family) { 1477 switch (addr->sa_family) {
1478 case AF_INET: 1478 case AF_INET:
@@ -1572,7 +1572,7 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
1572 return false; 1572 return false;
1573 } 1573 }
1574 1574
1575 /* now check if signing mode is acceptible */ 1575 /* now check if signing mode is acceptable */
1576 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && 1576 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
1577 (server->secMode & SECMODE_SIGN_REQUIRED)) 1577 (server->secMode & SECMODE_SIGN_REQUIRED))
1578 return false; 1578 return false;
@@ -1765,6 +1765,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1765 module_put(THIS_MODULE); 1765 module_put(THIS_MODULE);
1766 goto out_err_crypto_release; 1766 goto out_err_crypto_release;
1767 } 1767 }
1768 tcp_ses->tcpStatus = CifsNeedNegotiate;
1768 1769
1769 /* thread spawned, put it on the list */ 1770 /* thread spawned, put it on the list */
1770 spin_lock(&cifs_tcp_ses_lock); 1771 spin_lock(&cifs_tcp_ses_lock);
@@ -1808,7 +1809,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1808 break; 1809 break;
1809 default: 1810 default:
1810 /* anything else takes username/password */ 1811 /* anything else takes username/password */
1811 if (strncmp(ses->userName, vol->username, 1812 if (ses->user_name == NULL)
1813 continue;
1814 if (strncmp(ses->user_name, vol->username,
1812 MAX_USERNAME_SIZE)) 1815 MAX_USERNAME_SIZE))
1813 continue; 1816 continue;
1814 if (strlen(vol->username) != 0 && 1817 if (strlen(vol->username) != 0 &&
@@ -1851,6 +1854,8 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1851 cifs_put_tcp_session(server); 1854 cifs_put_tcp_session(server);
1852} 1855}
1853 1856
1857static bool warned_on_ntlm; /* globals init to false automatically */
1858
1854static struct cifsSesInfo * 1859static struct cifsSesInfo *
1855cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) 1860cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1856{ 1861{
@@ -1906,9 +1911,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1906 else 1911 else
1907 sprintf(ses->serverName, "%pI4", &addr->sin_addr); 1912 sprintf(ses->serverName, "%pI4", &addr->sin_addr);
1908 1913
1909 if (volume_info->username) 1914 if (volume_info->username) {
1910 strncpy(ses->userName, volume_info->username, 1915 ses->user_name = kstrdup(volume_info->username, GFP_KERNEL);
1911 MAX_USERNAME_SIZE); 1916 if (!ses->user_name)
1917 goto get_ses_fail;
1918 }
1912 1919
1913 /* volume_info->password freed at unmount */ 1920 /* volume_info->password freed at unmount */
1914 if (volume_info->password) { 1921 if (volume_info->password) {
@@ -1923,6 +1930,15 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1923 } 1930 }
1924 ses->cred_uid = volume_info->cred_uid; 1931 ses->cred_uid = volume_info->cred_uid;
1925 ses->linux_uid = volume_info->linux_uid; 1932 ses->linux_uid = volume_info->linux_uid;
1933
1934 /* ntlmv2 is much stronger than ntlm security, and has been broadly
1935 supported for many years, time to update default security mechanism */
1936 if ((volume_info->secFlg == 0) && warned_on_ntlm == false) {
1937 warned_on_ntlm = true;
1938 cERROR(1, "default security mechanism requested. The default "
1939 "security mechanism will be upgraded from ntlm to "
1940 "ntlmv2 in kernel release 2.6.41");
1941 }
1926 ses->overrideSecFlg = volume_info->secFlg; 1942 ses->overrideSecFlg = volume_info->secFlg;
1927 1943
1928 mutex_lock(&ses->session_mutex); 1944 mutex_lock(&ses->session_mutex);
@@ -2276,7 +2292,7 @@ static int
2276generic_ip_connect(struct TCP_Server_Info *server) 2292generic_ip_connect(struct TCP_Server_Info *server)
2277{ 2293{
2278 int rc = 0; 2294 int rc = 0;
2279 unsigned short int sport; 2295 __be16 sport;
2280 int slen, sfamily; 2296 int slen, sfamily;
2281 struct socket *socket = server->ssocket; 2297 struct socket *socket = server->ssocket;
2282 struct sockaddr *saddr; 2298 struct sockaddr *saddr;
@@ -2361,7 +2377,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
2361static int 2377static int
2362ip_connect(struct TCP_Server_Info *server) 2378ip_connect(struct TCP_Server_Info *server)
2363{ 2379{
2364 unsigned short int *sport; 2380 __be16 *sport;
2365 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; 2381 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
2366 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; 2382 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
2367 2383
@@ -2826,7 +2842,7 @@ try_mount_again:
2826 2842
2827remote_path_check: 2843remote_path_check:
2828 /* check if a whole path (including prepath) is not remote */ 2844 /* check if a whole path (including prepath) is not remote */
2829 if (!rc && cifs_sb->prepathlen && tcon) { 2845 if (!rc && tcon) {
2830 /* build_path_to_root works only when we have a valid tcon */ 2846 /* build_path_to_root works only when we have a valid tcon */
2831 full_path = cifs_build_path_to_root(cifs_sb, tcon); 2847 full_path = cifs_build_path_to_root(cifs_sb, tcon);
2832 if (full_path == NULL) { 2848 if (full_path == NULL) {
@@ -2933,7 +2949,7 @@ mount_fail_check:
2933 if (mount_data != mount_data_global) 2949 if (mount_data != mount_data_global)
2934 kfree(mount_data); 2950 kfree(mount_data);
2935 /* If find_unc succeeded then rc == 0 so we can not end */ 2951 /* If find_unc succeeded then rc == 0 so we can not end */
2936 /* up accidently freeing someone elses tcon struct */ 2952 /* up accidentally freeing someone elses tcon struct */
2937 if (tcon) 2953 if (tcon)
2938 cifs_put_tcon(tcon); 2954 cifs_put_tcon(tcon);
2939 else if (pSesInfo) 2955 else if (pSesInfo)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index dd5f22918c33..9ea65cf36714 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -189,7 +189,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
189 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); 189 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
190 /* EIO could indicate that (posix open) operation is not 190 /* EIO could indicate that (posix open) operation is not
191 supported, despite what server claimed in capability 191 supported, despite what server claimed in capability
192 negotation. EREMOTE indicates DFS junction, which is not 192 negotiation. EREMOTE indicates DFS junction, which is not
193 handled in posix open */ 193 handled in posix open */
194 194
195 if (rc == 0) { 195 if (rc == 0) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e964b1cd5dd0..faf59529e847 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -575,8 +575,10 @@ reopen_error_exit:
575 575
576int cifs_close(struct inode *inode, struct file *file) 576int cifs_close(struct inode *inode, struct file *file)
577{ 577{
578 cifsFileInfo_put(file->private_data); 578 if (file->private_data != NULL) {
579 file->private_data = NULL; 579 cifsFileInfo_put(file->private_data);
580 file->private_data = NULL;
581 }
580 582
581 /* return code from the ->release op is always ignored */ 583 /* return code from the ->release op is always ignored */
582 return 0; 584 return 0;
@@ -970,6 +972,9 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
970 total_written += bytes_written) { 972 total_written += bytes_written) {
971 rc = -EAGAIN; 973 rc = -EAGAIN;
972 while (rc == -EAGAIN) { 974 while (rc == -EAGAIN) {
975 struct kvec iov[2];
976 unsigned int len;
977
973 if (open_file->invalidHandle) { 978 if (open_file->invalidHandle) {
974 /* we could deadlock if we called 979 /* we could deadlock if we called
975 filemap_fdatawait from here so tell 980 filemap_fdatawait from here so tell
@@ -979,31 +984,14 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
979 if (rc != 0) 984 if (rc != 0)
980 break; 985 break;
981 } 986 }
982 if (experimEnabled || (pTcon->ses->server && 987
983 ((pTcon->ses->server->secMode & 988 len = min((size_t)cifs_sb->wsize,
984 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 989 write_size - total_written);
985 == 0))) { 990 /* iov[0] is reserved for smb header */
986 struct kvec iov[2]; 991 iov[1].iov_base = (char *)write_data + total_written;
987 unsigned int len; 992 iov[1].iov_len = len;
988 993 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len,
989 len = min((size_t)cifs_sb->wsize, 994 *poffset, &bytes_written, iov, 1, 0);
990 write_size - total_written);
991 /* iov[0] is reserved for smb header */
992 iov[1].iov_base = (char *)write_data +
993 total_written;
994 iov[1].iov_len = len;
995 rc = CIFSSMBWrite2(xid, pTcon,
996 open_file->netfid, len,
997 *poffset, &bytes_written,
998 iov, 1, 0);
999 } else
1000 rc = CIFSSMBWrite(xid, pTcon,
1001 open_file->netfid,
1002 min_t(const int, cifs_sb->wsize,
1003 write_size - total_written),
1004 *poffset, &bytes_written,
1005 write_data + total_written,
1006 NULL, 0);
1007 } 995 }
1008 if (rc || (bytes_written == 0)) { 996 if (rc || (bytes_written == 0)) {
1009 if (total_written) 997 if (total_written)
@@ -1240,12 +1228,6 @@ static int cifs_writepages(struct address_space *mapping,
1240 } 1228 }
1241 1229
1242 tcon = tlink_tcon(open_file->tlink); 1230 tcon = tlink_tcon(open_file->tlink);
1243 if (!experimEnabled && tcon->ses->server->secMode &
1244 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1245 cifsFileInfo_put(open_file);
1246 kfree(iov);
1247 return generic_writepages(mapping, wbc);
1248 }
1249 cifsFileInfo_put(open_file); 1231 cifsFileInfo_put(open_file);
1250 1232
1251 xid = GetXid(); 1233 xid = GetXid();
@@ -1569,34 +1551,6 @@ int cifs_fsync(struct file *file, int datasync)
1569 return rc; 1551 return rc;
1570} 1552}
1571 1553
1572/* static void cifs_sync_page(struct page *page)
1573{
1574 struct address_space *mapping;
1575 struct inode *inode;
1576 unsigned long index = page->index;
1577 unsigned int rpages = 0;
1578 int rc = 0;
1579
1580 cFYI(1, "sync page %p", page);
1581 mapping = page->mapping;
1582 if (!mapping)
1583 return 0;
1584 inode = mapping->host;
1585 if (!inode)
1586 return; */
1587
1588/* fill in rpages then
1589 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1590
1591/* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1592
1593#if 0
1594 if (rc < 0)
1595 return rc;
1596 return 0;
1597#endif
1598} */
1599
1600/* 1554/*
1601 * As file closes, flush all cached write data for this inode checking 1555 * As file closes, flush all cached write data for this inode checking
1602 * for write behind errors. 1556 * for write behind errors.
@@ -2008,6 +1962,24 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2008 return total_read; 1962 return total_read;
2009} 1963}
2010 1964
1965/*
1966 * If the page is mmap'ed into a process' page tables, then we need to make
1967 * sure that it doesn't change while being written back.
1968 */
1969static int
1970cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1971{
1972 struct page *page = vmf->page;
1973
1974 lock_page(page);
1975 return VM_FAULT_LOCKED;
1976}
1977
1978static struct vm_operations_struct cifs_file_vm_ops = {
1979 .fault = filemap_fault,
1980 .page_mkwrite = cifs_page_mkwrite,
1981};
1982
2011int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 1983int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2012{ 1984{
2013 int rc, xid; 1985 int rc, xid;
@@ -2019,6 +1991,8 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2019 cifs_invalidate_mapping(inode); 1991 cifs_invalidate_mapping(inode);
2020 1992
2021 rc = generic_file_mmap(file, vma); 1993 rc = generic_file_mmap(file, vma);
1994 if (rc == 0)
1995 vma->vm_ops = &cifs_file_vm_ops;
2022 FreeXid(xid); 1996 FreeXid(xid);
2023 return rc; 1997 return rc;
2024} 1998}
@@ -2035,6 +2009,8 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2035 return rc; 2009 return rc;
2036 } 2010 }
2037 rc = generic_file_mmap(file, vma); 2011 rc = generic_file_mmap(file, vma);
2012 if (rc == 0)
2013 vma->vm_ops = &cifs_file_vm_ops;
2038 FreeXid(xid); 2014 FreeXid(xid);
2039 return rc; 2015 return rc;
2040} 2016}
@@ -2510,7 +2486,6 @@ const struct address_space_operations cifs_addr_ops = {
2510 .set_page_dirty = __set_page_dirty_nobuffers, 2486 .set_page_dirty = __set_page_dirty_nobuffers,
2511 .releasepage = cifs_release_page, 2487 .releasepage = cifs_release_page,
2512 .invalidatepage = cifs_invalidate_page, 2488 .invalidatepage = cifs_invalidate_page,
2513 /* .sync_page = cifs_sync_page, */
2514 /* .direct_IO = */ 2489 /* .direct_IO = */
2515}; 2490};
2516 2491
@@ -2528,6 +2503,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2528 .set_page_dirty = __set_page_dirty_nobuffers, 2503 .set_page_dirty = __set_page_dirty_nobuffers,
2529 .releasepage = cifs_release_page, 2504 .releasepage = cifs_release_page,
2530 .invalidatepage = cifs_invalidate_page, 2505 .invalidatepage = cifs_invalidate_page,
2531 /* .sync_page = cifs_sync_page, */
2532 /* .direct_IO = */ 2506 /* .direct_IO = */
2533}; 2507};
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e8804d373404..ce417a9764a3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -239,7 +239,7 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
239 if (rc != 0) 239 if (rc != 0)
240 return rc; 240 return rc;
241 241
242 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 242 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
243 CIFSSMBClose(xid, tcon, netfid); 243 CIFSSMBClose(xid, tcon, netfid);
244 /* it's not a symlink */ 244 /* it's not a symlink */
245 return -EINVAL; 245 return -EINVAL;
@@ -316,7 +316,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
316 if (rc != 0) 316 if (rc != 0)
317 goto out; 317 goto out;
318 318
319 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 319 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
320 CIFSSMBClose(xid, pTcon, netfid); 320 CIFSSMBClose(xid, pTcon, netfid);
321 /* it's not a symlink */ 321 /* it's not a symlink */
322 goto out; 322 goto out;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2a930a752a78..0c684ae4c071 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -100,6 +100,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
100 memset(buf_to_free->password, 0, strlen(buf_to_free->password)); 100 memset(buf_to_free->password, 0, strlen(buf_to_free->password));
101 kfree(buf_to_free->password); 101 kfree(buf_to_free->password);
102 } 102 }
103 kfree(buf_to_free->user_name);
103 kfree(buf_to_free->domainName); 104 kfree(buf_to_free->domainName);
104 kfree(buf_to_free); 105 kfree(buf_to_free);
105} 106}
@@ -520,7 +521,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
520 (struct smb_com_transaction_change_notify_rsp *)buf; 521 (struct smb_com_transaction_change_notify_rsp *)buf;
521 struct file_notify_information *pnotify; 522 struct file_notify_information *pnotify;
522 __u32 data_offset = 0; 523 __u32 data_offset = 0;
523 if (pSMBr->ByteCount > sizeof(struct file_notify_information)) { 524 if (get_bcc_le(buf) > sizeof(struct file_notify_information)) {
524 data_offset = le32_to_cpu(pSMBr->DataOffset); 525 data_offset = le32_to_cpu(pSMBr->DataOffset);
525 526
526 pnotify = (struct file_notify_information *) 527 pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 16765703131b..f6728eb6f4b9 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -219,12 +219,12 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
219 bcc_ptr++; 219 bcc_ptr++;
220 } */ 220 } */
221 /* copy user */ 221 /* copy user */
222 if (ses->userName == NULL) { 222 if (ses->user_name == NULL) {
223 /* null user mount */ 223 /* null user mount */
224 *bcc_ptr = 0; 224 *bcc_ptr = 0;
225 *(bcc_ptr+1) = 0; 225 *(bcc_ptr+1) = 0;
226 } else { 226 } else {
227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->user_name,
228 MAX_USERNAME_SIZE, nls_cp); 228 MAX_USERNAME_SIZE, nls_cp);
229 } 229 }
230 bcc_ptr += 2 * bytes_ret; 230 bcc_ptr += 2 * bytes_ret;
@@ -244,12 +244,11 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
244 /* copy user */ 244 /* copy user */
245 /* BB what about null user mounts - check that we do this BB */ 245 /* BB what about null user mounts - check that we do this BB */
246 /* copy user */ 246 /* copy user */
247 if (ses->userName == NULL) { 247 if (ses->user_name != NULL)
248 /* BB what about null user mounts - check that we do this BB */ 248 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
249 } else { 249 /* else null user mount */
250 strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE); 250
251 } 251 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
252 bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
253 *bcc_ptr = 0; 252 *bcc_ptr = 0;
254 bcc_ptr++; /* account for null termination */ 253 bcc_ptr++; /* account for null termination */
255 254
@@ -405,8 +404,8 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 404 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
406 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 405 we must set the MIC field of the AUTHENTICATE_MESSAGE */
407 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); 406 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
408 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); 407 tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
409 tilen = cpu_to_le16(pblob->TargetInfoArray.Length); 408 tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
410 if (tilen) { 409 if (tilen) {
411 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); 410 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
412 if (!ses->auth_key.response) { 411 if (!ses->auth_key.response) {
@@ -523,14 +522,14 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
523 tmp += len; 522 tmp += len;
524 } 523 }
525 524
526 if (ses->userName == NULL) { 525 if (ses->user_name == NULL) {
527 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 526 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
528 sec_blob->UserName.Length = 0; 527 sec_blob->UserName.Length = 0;
529 sec_blob->UserName.MaximumLength = 0; 528 sec_blob->UserName.MaximumLength = 0;
530 tmp += 2; 529 tmp += 2;
531 } else { 530 } else {
532 int len; 531 int len;
533 len = cifs_strtoUCS((__le16 *)tmp, ses->userName, 532 len = cifs_strtoUCS((__le16 *)tmp, ses->user_name,
534 MAX_USERNAME_SIZE, nls_cp); 533 MAX_USERNAME_SIZE, nls_cp);
535 len *= 2; /* unicode is 2 bytes each */ 534 len *= 2; /* unicode is 2 bytes each */
536 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 535 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
diff --git a/fs/coda/Makefile b/fs/coda/Makefile
index 6c22e61da397..1bab69a0d347 100644
--- a/fs/coda/Makefile
+++ b/fs/coda/Makefile
@@ -9,4 +9,4 @@ coda-objs := psdev.o cache.o cnode.o inode.o dir.o file.o upcall.o \
9 9
10# If you want debugging output, please uncomment the following line. 10# If you want debugging output, please uncomment the following line.
11 11
12# EXTRA_CFLAGS += -DDEBUG -DDEBUG_SMB_MALLOC=1 12# ccflags-y := -DDEBUG -DDEBUG_SMB_MALLOC=1
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index c6405ce3c50e..af56ad56a89a 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -13,7 +13,6 @@
13 13
14#ifdef CONFIG_SYSCTL 14#ifdef CONFIG_SYSCTL
15static struct ctl_table_header *fs_table_header; 15static struct ctl_table_header *fs_table_header;
16#endif
17 16
18static ctl_table coda_table[] = { 17static ctl_table coda_table[] = {
19 { 18 {
@@ -40,7 +39,6 @@ static ctl_table coda_table[] = {
40 {} 39 {}
41}; 40};
42 41
43#ifdef CONFIG_SYSCTL
44static ctl_table fs_table[] = { 42static ctl_table fs_table[] = {
45 { 43 {
46 .procname = "coda", 44 .procname = "coda",
@@ -49,22 +47,27 @@ static ctl_table fs_table[] = {
49 }, 47 },
50 {} 48 {}
51}; 49};
52#endif
53 50
54void coda_sysctl_init(void) 51void coda_sysctl_init(void)
55{ 52{
56#ifdef CONFIG_SYSCTL
57 if ( !fs_table_header ) 53 if ( !fs_table_header )
58 fs_table_header = register_sysctl_table(fs_table); 54 fs_table_header = register_sysctl_table(fs_table);
59#endif
60} 55}
61 56
62void coda_sysctl_clean(void) 57void coda_sysctl_clean(void)
63{ 58{
64#ifdef CONFIG_SYSCTL
65 if ( fs_table_header ) { 59 if ( fs_table_header ) {
66 unregister_sysctl_table(fs_table_header); 60 unregister_sysctl_table(fs_table_header);
67 fs_table_header = NULL; 61 fs_table_header = NULL;
68 } 62 }
69#endif
70} 63}
64
65#else
66void coda_sysctl_init(void)
67{
68}
69
70void coda_sysctl_clean(void)
71{
72}
73#endif
diff --git a/fs/compat.c b/fs/compat.c
index c6d31a3bab88..72fe6cda9108 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1671,9 +1671,6 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1671 * Update: ERESTARTSYS breaks at least the xview clock binary, so 1671 * Update: ERESTARTSYS breaks at least the xview clock binary, so
1672 * I'm trying ERESTARTNOHAND which restart only when you want to. 1672 * I'm trying ERESTARTNOHAND which restart only when you want to.
1673 */ 1673 */
1674#define MAX_SELECT_SECONDS \
1675 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1676
1677int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1674int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1678 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1675 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1679 struct timespec *end_time) 1676 struct timespec *end_time)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 90ff3cb10de3..3313dd19f543 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -990,7 +990,7 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
990 * This describes these functions and their helpers. 990 * This describes these functions and their helpers.
991 * 991 *
992 * Allow another kernel system to depend on a config_item. If this 992 * Allow another kernel system to depend on a config_item. If this
993 * happens, the item cannot go away until the dependant can live without 993 * happens, the item cannot go away until the dependent can live without
994 * it. The idea is to give client modules as simple an interface as 994 * it. The idea is to give client modules as simple an interface as
995 * possible. When a system asks them to depend on an item, they just 995 * possible. When a system asks them to depend on an item, they just
996 * call configfs_depend_item(). If the item is live and the client 996 * call configfs_depend_item(). If the item is live and the client
diff --git a/fs/dcache.c b/fs/dcache.c
index ad25c4cec7d5..22a0ef41bad1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -99,12 +99,9 @@ static struct kmem_cache *dentry_cache __read_mostly;
99static unsigned int d_hash_mask __read_mostly; 99static unsigned int d_hash_mask __read_mostly;
100static unsigned int d_hash_shift __read_mostly; 100static unsigned int d_hash_shift __read_mostly;
101 101
102struct dcache_hash_bucket { 102static struct hlist_bl_head *dentry_hashtable __read_mostly;
103 struct hlist_bl_head head;
104};
105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
106 103
107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, 104static inline struct hlist_bl_head *d_hash(struct dentry *parent,
108 unsigned long hash) 105 unsigned long hash)
109{ 106{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 107 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -112,16 +109,6 @@ static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
112 return dentry_hashtable + (hash & D_HASHMASK); 109 return dentry_hashtable + (hash & D_HASHMASK);
113} 110}
114 111
115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
116{
117 bit_spin_lock(0, (unsigned long *)&b->head.first);
118}
119
120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
121{
122 __bit_spin_unlock(0, (unsigned long *)&b->head.first);
123}
124
125/* Statistics gathering. */ 112/* Statistics gathering. */
126struct dentry_stat_t dentry_stat = { 113struct dentry_stat_t dentry_stat = {
127 .age_limit = 45, 114 .age_limit = 45,
@@ -167,8 +154,8 @@ static void d_free(struct dentry *dentry)
167 if (dentry->d_op && dentry->d_op->d_release) 154 if (dentry->d_op && dentry->d_op->d_release)
168 dentry->d_op->d_release(dentry); 155 dentry->d_op->d_release(dentry);
169 156
170 /* if dentry was never inserted into hash, immediate free is OK */ 157 /* if dentry was never visible to RCU, immediate free is OK */
171 if (hlist_bl_unhashed(&dentry->d_hash)) 158 if (!(dentry->d_flags & DCACHE_RCUACCESS))
172 __d_free(&dentry->d_u.d_rcu); 159 __d_free(&dentry->d_u.d_rcu);
173 else 160 else
174 call_rcu(&dentry->d_u.d_rcu, __d_free); 161 call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -330,28 +317,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
330 */ 317 */
331void __d_drop(struct dentry *dentry) 318void __d_drop(struct dentry *dentry)
332{ 319{
333 if (!(dentry->d_flags & DCACHE_UNHASHED)) { 320 if (!d_unhashed(dentry)) {
334 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { 321 struct hlist_bl_head *b;
335 bit_spin_lock(0, 322 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
336 (unsigned long *)&dentry->d_sb->s_anon.first); 323 b = &dentry->d_sb->s_anon;
337 dentry->d_flags |= DCACHE_UNHASHED; 324 else
338 hlist_bl_del_init(&dentry->d_hash);
339 __bit_spin_unlock(0,
340 (unsigned long *)&dentry->d_sb->s_anon.first);
341 } else {
342 struct dcache_hash_bucket *b;
343 b = d_hash(dentry->d_parent, dentry->d_name.hash); 325 b = d_hash(dentry->d_parent, dentry->d_name.hash);
344 spin_lock_bucket(b); 326
345 /* 327 hlist_bl_lock(b);
346 * We may not actually need to put DCACHE_UNHASHED 328 __hlist_bl_del(&dentry->d_hash);
347 * manipulations under the hash lock, but follow 329 dentry->d_hash.pprev = NULL;
348 * the principle of least surprise. 330 hlist_bl_unlock(b);
349 */ 331
350 dentry->d_flags |= DCACHE_UNHASHED; 332 dentry_rcuwalk_barrier(dentry);
351 hlist_bl_del_rcu(&dentry->d_hash);
352 spin_unlock_bucket(b);
353 dentry_rcuwalk_barrier(dentry);
354 }
355 } 333 }
356} 334}
357EXPORT_SYMBOL(__d_drop); 335EXPORT_SYMBOL(__d_drop);
@@ -1304,7 +1282,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1304 dname[name->len] = 0; 1282 dname[name->len] = 0;
1305 1283
1306 dentry->d_count = 1; 1284 dentry->d_count = 1;
1307 dentry->d_flags = DCACHE_UNHASHED; 1285 dentry->d_flags = 0;
1308 spin_lock_init(&dentry->d_lock); 1286 spin_lock_init(&dentry->d_lock);
1309 seqcount_init(&dentry->d_seq); 1287 seqcount_init(&dentry->d_seq);
1310 dentry->d_inode = NULL; 1288 dentry->d_inode = NULL;
@@ -1606,10 +1584,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
1606 tmp->d_inode = inode; 1584 tmp->d_inode = inode;
1607 tmp->d_flags |= DCACHE_DISCONNECTED; 1585 tmp->d_flags |= DCACHE_DISCONNECTED;
1608 list_add(&tmp->d_alias, &inode->i_dentry); 1586 list_add(&tmp->d_alias, &inode->i_dentry);
1609 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1587 hlist_bl_lock(&tmp->d_sb->s_anon);
1610 tmp->d_flags &= ~DCACHE_UNHASHED;
1611 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1588 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1612 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1589 hlist_bl_unlock(&tmp->d_sb->s_anon);
1613 spin_unlock(&tmp->d_lock); 1590 spin_unlock(&tmp->d_lock);
1614 spin_unlock(&inode->i_lock); 1591 spin_unlock(&inode->i_lock);
1615 security_d_instantiate(tmp, inode); 1592 security_d_instantiate(tmp, inode);
@@ -1789,7 +1766,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1789 unsigned int len = name->len; 1766 unsigned int len = name->len;
1790 unsigned int hash = name->hash; 1767 unsigned int hash = name->hash;
1791 const unsigned char *str = name->name; 1768 const unsigned char *str = name->name;
1792 struct dcache_hash_bucket *b = d_hash(parent, hash); 1769 struct hlist_bl_head *b = d_hash(parent, hash);
1793 struct hlist_bl_node *node; 1770 struct hlist_bl_node *node;
1794 struct dentry *dentry; 1771 struct dentry *dentry;
1795 1772
@@ -1813,7 +1790,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1813 * 1790 *
1814 * See Documentation/filesystems/path-lookup.txt for more details. 1791 * See Documentation/filesystems/path-lookup.txt for more details.
1815 */ 1792 */
1816 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1793 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1817 struct inode *i; 1794 struct inode *i;
1818 const char *tname; 1795 const char *tname;
1819 int tlen; 1796 int tlen;
@@ -1908,7 +1885,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1908 unsigned int len = name->len; 1885 unsigned int len = name->len;
1909 unsigned int hash = name->hash; 1886 unsigned int hash = name->hash;
1910 const unsigned char *str = name->name; 1887 const unsigned char *str = name->name;
1911 struct dcache_hash_bucket *b = d_hash(parent, hash); 1888 struct hlist_bl_head *b = d_hash(parent, hash);
1912 struct hlist_bl_node *node; 1889 struct hlist_bl_node *node;
1913 struct dentry *found = NULL; 1890 struct dentry *found = NULL;
1914 struct dentry *dentry; 1891 struct dentry *dentry;
@@ -1935,7 +1912,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1935 */ 1912 */
1936 rcu_read_lock(); 1913 rcu_read_lock();
1937 1914
1938 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1915 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1939 const char *tname; 1916 const char *tname;
1940 int tlen; 1917 int tlen;
1941 1918
@@ -2086,13 +2063,13 @@ again:
2086} 2063}
2087EXPORT_SYMBOL(d_delete); 2064EXPORT_SYMBOL(d_delete);
2088 2065
2089static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) 2066static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2090{ 2067{
2091 BUG_ON(!d_unhashed(entry)); 2068 BUG_ON(!d_unhashed(entry));
2092 spin_lock_bucket(b); 2069 hlist_bl_lock(b);
2093 entry->d_flags &= ~DCACHE_UNHASHED; 2070 entry->d_flags |= DCACHE_RCUACCESS;
2094 hlist_bl_add_head_rcu(&entry->d_hash, &b->head); 2071 hlist_bl_add_head_rcu(&entry->d_hash, b);
2095 spin_unlock_bucket(b); 2072 hlist_bl_unlock(b);
2096} 2073}
2097 2074
2098static void _d_rehash(struct dentry * entry) 2075static void _d_rehash(struct dentry * entry)
@@ -2131,7 +2108,7 @@ EXPORT_SYMBOL(d_rehash);
2131 */ 2108 */
2132void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2109void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2133{ 2110{
2134 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2111 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2135 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2112 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2136 2113
2137 spin_lock(&dentry->d_lock); 2114 spin_lock(&dentry->d_lock);
@@ -3025,7 +3002,7 @@ static void __init dcache_init_early(void)
3025 3002
3026 dentry_hashtable = 3003 dentry_hashtable =
3027 alloc_large_system_hash("Dentry cache", 3004 alloc_large_system_hash("Dentry cache",
3028 sizeof(struct dcache_hash_bucket), 3005 sizeof(struct hlist_bl_head),
3029 dhash_entries, 3006 dhash_entries,
3030 13, 3007 13,
3031 HASH_EARLY, 3008 HASH_EARLY,
@@ -3034,7 +3011,7 @@ static void __init dcache_init_early(void)
3034 0); 3011 0);
3035 3012
3036 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3013 for (loop = 0; loop < (1 << d_hash_shift); loop++)
3037 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3014 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3038} 3015}
3039 3016
3040static void __init dcache_init(void) 3017static void __init dcache_init(void)
@@ -3057,7 +3034,7 @@ static void __init dcache_init(void)
3057 3034
3058 dentry_hashtable = 3035 dentry_hashtable =
3059 alloc_large_system_hash("Dentry cache", 3036 alloc_large_system_hash("Dentry cache",
3060 sizeof(struct dcache_hash_bucket), 3037 sizeof(struct hlist_bl_head),
3061 dhash_entries, 3038 dhash_entries,
3062 13, 3039 13,
3063 0, 3040 0,
@@ -3066,7 +3043,7 @@ static void __init dcache_init(void)
3066 0); 3043 0);
3067 3044
3068 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3045 for (loop = 0; loop < (1 << d_hash_shift); loop++)
3069 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3046 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3070} 3047}
3071 3048
3072/* SLAB cache for __getname() consumers */ 3049/* SLAB cache for __getname() consumers */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1bb547c9cad6..2f27e578d466 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -479,6 +479,7 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
479 struct dentry *root = sb->s_root; 479 struct dentry *root = sb->s_root;
480 struct pts_fs_info *fsi = DEVPTS_SB(sb); 480 struct pts_fs_info *fsi = DEVPTS_SB(sb);
481 struct pts_mount_opts *opts = &fsi->mount_opts; 481 struct pts_mount_opts *opts = &fsi->mount_opts;
482 int ret = 0;
482 char s[12]; 483 char s[12];
483 484
484 /* We're supposed to be given the slave end of a pty */ 485 /* We're supposed to be given the slave end of a pty */
@@ -501,14 +502,17 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
501 mutex_lock(&root->d_inode->i_mutex); 502 mutex_lock(&root->d_inode->i_mutex);
502 503
503 dentry = d_alloc_name(root, s); 504 dentry = d_alloc_name(root, s);
504 if (!IS_ERR(dentry)) { 505 if (dentry) {
505 d_add(dentry, inode); 506 d_add(dentry, inode);
506 fsnotify_create(root->d_inode, dentry); 507 fsnotify_create(root->d_inode, dentry);
508 } else {
509 iput(inode);
510 ret = -ENOMEM;
507 } 511 }
508 512
509 mutex_unlock(&root->d_inode->i_mutex); 513 mutex_unlock(&root->d_inode->i_mutex);
510 514
511 return 0; 515 return ret;
512} 516}
513 517
514struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) 518struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
@@ -544,17 +548,12 @@ void devpts_pty_kill(struct tty_struct *tty)
544 mutex_lock(&root->d_inode->i_mutex); 548 mutex_lock(&root->d_inode->i_mutex);
545 549
546 dentry = d_find_alias(inode); 550 dentry = d_find_alias(inode);
547 if (IS_ERR(dentry))
548 goto out;
549
550 if (dentry) {
551 inode->i_nlink--;
552 d_delete(dentry);
553 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
554 }
555 551
552 inode->i_nlink--;
553 d_delete(dentry);
554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
556 dput(dentry); /* d_find_alias above */ 555 dput(dentry); /* d_find_alias above */
557out: 556
558 mutex_unlock(&root->d_inode->i_mutex); 557 mutex_unlock(&root->d_inode->i_mutex);
559} 558}
560 559
diff --git a/fs/direct-io.c b/fs/direct-io.c
index dcb5577cde1d..ac5f164170e3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1110,11 +1110,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1110 ((rw & READ) || (dio->result == dio->size))) 1110 ((rw & READ) || (dio->result == dio->size)))
1111 ret = -EIOCBQUEUED; 1111 ret = -EIOCBQUEUED;
1112 1112
1113 if (ret != -EIOCBQUEUED) { 1113 if (ret != -EIOCBQUEUED)
1114 /* All IO is now issued, send it on its way */
1115 blk_run_address_space(inode->i_mapping);
1116 dio_await_completion(dio); 1114 dio_await_completion(dio);
1117 }
1118 1115
1119 /* 1116 /*
1120 * Sync will always be dropping the final ref and completing the 1117 * Sync will always be dropping the final ref and completing the
@@ -1176,7 +1173,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1176 struct dio *dio; 1173 struct dio *dio;
1177 1174
1178 if (rw & WRITE) 1175 if (rw & WRITE)
1179 rw = WRITE_ODIRECT_PLUG; 1176 rw = WRITE_ODIRECT;
1180 1177
1181 if (bdev) 1178 if (bdev)
1182 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1179 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 04b8c449303f..56d6bfcc1e48 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -519,7 +519,7 @@ static void toss_rsb(struct kref *kref)
519 } 519 }
520} 520}
521 521
522/* When all references to the rsb are gone it's transfered to 522/* When all references to the rsb are gone it's transferred to
523 the tossed list for later disposal. */ 523 the tossed list for later disposal. */
524 524
525static void put_rsb(struct dlm_rsb *r) 525static void put_rsb(struct dlm_rsb *r)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index bffa1e73b9a9..5e2c71f05e46 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -810,7 +810,7 @@ static int tcp_accept_from_sock(struct connection *con)
810 810
811 /* 811 /*
812 * Add it to the active queue in case we got data 812 * Add it to the active queue in case we got data
813 * beween processing the accept adding the socket 813 * between processing the accept adding the socket
814 * to the read_sockets list 814 * to the read_sockets list
815 */ 815 */
816 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) 816 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index eda43f362616..14638235f7b2 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -304,7 +304,7 @@ static void set_master_lkbs(struct dlm_rsb *r)
304} 304}
305 305
306/* 306/*
307 * Propogate the new master nodeid to locks 307 * Propagate the new master nodeid to locks
308 * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. 308 * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
309 * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which 309 * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
310 * rsb's to consider. 310 * rsb's to consider.
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 2195c213ab2f..98b77c89494c 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,6 +8,7 @@
8#include <linux/writeback.h> 8#include <linux/writeback.h>
9#include <linux/sysctl.h> 9#include <linux/sysctl.h>
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include "internal.h"
11 12
12/* A global variable is a bit ugly, but it keeps the code simple */ 13/* A global variable is a bit ugly, but it keeps the code simple */
13int sysctl_drop_caches; 14int sysctl_drop_caches;
@@ -16,20 +17,23 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
16{ 17{
17 struct inode *inode, *toput_inode = NULL; 18 struct inode *inode, *toput_inode = NULL;
18 19
19 spin_lock(&inode_lock); 20 spin_lock(&inode_sb_list_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 21 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 22 spin_lock(&inode->i_lock);
22 continue; 23 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
23 if (inode->i_mapping->nrpages == 0) 24 (inode->i_mapping->nrpages == 0)) {
25 spin_unlock(&inode->i_lock);
24 continue; 26 continue;
27 }
25 __iget(inode); 28 __iget(inode);
26 spin_unlock(&inode_lock); 29 spin_unlock(&inode->i_lock);
30 spin_unlock(&inode_sb_list_lock);
27 invalidate_mapping_pages(inode->i_mapping, 0, -1); 31 invalidate_mapping_pages(inode->i_mapping, 0, -1);
28 iput(toput_inode); 32 iput(toput_inode);
29 toput_inode = inode; 33 toput_inode = inode;
30 spin_lock(&inode_lock); 34 spin_lock(&inode_sb_list_lock);
31 } 35 }
32 spin_unlock(&inode_lock); 36 spin_unlock(&inode_sb_list_lock);
33 iput(toput_inode); 37 iput(toput_inode);
34} 38}
35 39
@@ -45,7 +49,11 @@ static void drop_slab(void)
45int drop_caches_sysctl_handler(ctl_table *table, int write, 49int drop_caches_sysctl_handler(ctl_table *table, int write,
46 void __user *buffer, size_t *length, loff_t *ppos) 50 void __user *buffer, size_t *length, loff_t *ppos)
47{ 51{
48 proc_dointvec_minmax(table, write, buffer, length, ppos); 52 int ret;
53
54 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
55 if (ret)
56 return ret;
49 if (write) { 57 if (write) {
50 if (sysctl_drop_caches & 1) 58 if (sysctl_drop_caches & 1)
51 iterate_supers(drop_pagecache_sb, NULL); 59 iterate_supers(drop_pagecache_sb, NULL);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index bfd8b680e648..b8d5c8091024 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -266,7 +266,6 @@ void ecryptfs_destroy_mount_crypt_stat(
266 &mount_crypt_stat->global_auth_tok_list, 266 &mount_crypt_stat->global_auth_tok_list,
267 mount_crypt_stat_list) { 267 mount_crypt_stat_list) {
268 list_del(&auth_tok->mount_crypt_stat_list); 268 list_del(&auth_tok->mount_crypt_stat_list);
269 mount_crypt_stat->num_global_auth_toks--;
270 if (auth_tok->global_auth_tok_key 269 if (auth_tok->global_auth_tok_key
271 && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID)) 270 && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID))
272 key_put(auth_tok->global_auth_tok_key); 271 key_put(auth_tok->global_auth_tok_key);
@@ -1389,6 +1388,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1389 rc = -ENOMEM; 1388 rc = -ENOMEM;
1390 goto out; 1389 goto out;
1391 } 1390 }
1391 /* Zeroed page ensures the in-header unencrypted i_size is set to 0 */
1392 rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat, 1392 rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat,
1393 ecryptfs_dentry); 1393 ecryptfs_dentry);
1394 if (unlikely(rc)) { 1394 if (unlikely(rc)) {
@@ -1452,6 +1452,25 @@ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1452 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; 1452 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
1453} 1453}
1454 1454
1455void ecryptfs_i_size_init(const char *page_virt, struct inode *inode)
1456{
1457 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
1458 struct ecryptfs_crypt_stat *crypt_stat;
1459 u64 file_size;
1460
1461 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
1462 mount_crypt_stat =
1463 &ecryptfs_superblock_to_private(inode->i_sb)->mount_crypt_stat;
1464 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
1465 file_size = i_size_read(ecryptfs_inode_to_lower(inode));
1466 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
1467 file_size += crypt_stat->metadata_size;
1468 } else
1469 file_size = get_unaligned_be64(page_virt);
1470 i_size_write(inode, (loff_t)file_size);
1471 crypt_stat->flags |= ECRYPTFS_I_SIZE_INITIALIZED;
1472}
1473
1455/** 1474/**
1456 * ecryptfs_read_headers_virt 1475 * ecryptfs_read_headers_virt
1457 * @page_virt: The virtual address into which to read the headers 1476 * @page_virt: The virtual address into which to read the headers
@@ -1482,6 +1501,8 @@ static int ecryptfs_read_headers_virt(char *page_virt,
1482 rc = -EINVAL; 1501 rc = -EINVAL;
1483 goto out; 1502 goto out;
1484 } 1503 }
1504 if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
1505 ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
1485 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1506 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1486 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset), 1507 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
1487 &bytes_read); 1508 &bytes_read);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index e00753496e3e..e70282775e2c 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -233,7 +233,7 @@ ecryptfs_get_key_payload_data(struct key *key)
233 233
234struct ecryptfs_key_sig { 234struct ecryptfs_key_sig {
235 struct list_head crypt_stat_list; 235 struct list_head crypt_stat_list;
236 char keysig[ECRYPTFS_SIG_SIZE_HEX]; 236 char keysig[ECRYPTFS_SIG_SIZE_HEX + 1];
237}; 237};
238 238
239struct ecryptfs_filename { 239struct ecryptfs_filename {
@@ -257,19 +257,19 @@ struct ecryptfs_filename {
257struct ecryptfs_crypt_stat { 257struct ecryptfs_crypt_stat {
258#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 258#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001
259#define ECRYPTFS_POLICY_APPLIED 0x00000002 259#define ECRYPTFS_POLICY_APPLIED 0x00000002
260#define ECRYPTFS_NEW_FILE 0x00000004 260#define ECRYPTFS_ENCRYPTED 0x00000004
261#define ECRYPTFS_ENCRYPTED 0x00000008 261#define ECRYPTFS_SECURITY_WARNING 0x00000008
262#define ECRYPTFS_SECURITY_WARNING 0x00000010 262#define ECRYPTFS_ENABLE_HMAC 0x00000010
263#define ECRYPTFS_ENABLE_HMAC 0x00000020 263#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000020
264#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 264#define ECRYPTFS_KEY_VALID 0x00000040
265#define ECRYPTFS_KEY_VALID 0x00000080 265#define ECRYPTFS_METADATA_IN_XATTR 0x00000080
266#define ECRYPTFS_METADATA_IN_XATTR 0x00000100 266#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000100
267#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200 267#define ECRYPTFS_KEY_SET 0x00000200
268#define ECRYPTFS_KEY_SET 0x00000400 268#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000400
269#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 269#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00000800
270#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 270#define ECRYPTFS_ENCFN_USE_FEK 0x00001000
271#define ECRYPTFS_ENCFN_USE_FEK 0x00002000 271#define ECRYPTFS_UNLINK_SIGS 0x00002000
272#define ECRYPTFS_UNLINK_SIGS 0x00004000 272#define ECRYPTFS_I_SIZE_INITIALIZED 0x00004000
273 u32 flags; 273 u32 flags;
274 unsigned int file_version; 274 unsigned int file_version;
275 size_t iv_bytes; 275 size_t iv_bytes;
@@ -296,8 +296,9 @@ struct ecryptfs_crypt_stat {
296struct ecryptfs_inode_info { 296struct ecryptfs_inode_info {
297 struct inode vfs_inode; 297 struct inode vfs_inode;
298 struct inode *wii_inode; 298 struct inode *wii_inode;
299 struct file *lower_file;
300 struct mutex lower_file_mutex; 299 struct mutex lower_file_mutex;
300 atomic_t lower_file_count;
301 struct file *lower_file;
301 struct ecryptfs_crypt_stat crypt_stat; 302 struct ecryptfs_crypt_stat crypt_stat;
302}; 303};
303 304
@@ -333,7 +334,6 @@ struct ecryptfs_global_auth_tok {
333 u32 flags; 334 u32 flags;
334 struct list_head mount_crypt_stat_list; 335 struct list_head mount_crypt_stat_list;
335 struct key *global_auth_tok_key; 336 struct key *global_auth_tok_key;
336 struct ecryptfs_auth_tok *global_auth_tok;
337 unsigned char sig[ECRYPTFS_SIG_SIZE_HEX + 1]; 337 unsigned char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
338}; 338};
339 339
@@ -380,7 +380,6 @@ struct ecryptfs_mount_crypt_stat {
380 u32 flags; 380 u32 flags;
381 struct list_head global_auth_tok_list; 381 struct list_head global_auth_tok_list;
382 struct mutex global_auth_tok_list_mutex; 382 struct mutex global_auth_tok_list_mutex;
383 size_t num_global_auth_toks;
384 size_t global_default_cipher_key_size; 383 size_t global_default_cipher_key_size;
385 size_t global_default_fn_cipher_key_bytes; 384 size_t global_default_fn_cipher_key_bytes;
386 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE 385 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
@@ -630,6 +629,7 @@ struct ecryptfs_open_req {
630int ecryptfs_interpose(struct dentry *hidden_dentry, 629int ecryptfs_interpose(struct dentry *hidden_dentry,
631 struct dentry *this_dentry, struct super_block *sb, 630 struct dentry *this_dentry, struct super_block *sb,
632 u32 flags); 631 u32 flags);
632void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
634 struct dentry *lower_dentry, 634 struct dentry *lower_dentry,
635 struct inode *ecryptfs_dir_inode); 635 struct inode *ecryptfs_dir_inode);
@@ -761,7 +761,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
761 struct dentry *lower_dentry, 761 struct dentry *lower_dentry,
762 struct vfsmount *lower_mnt, 762 struct vfsmount *lower_mnt,
763 const struct cred *cred); 763 const struct cred *cred);
764int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); 764int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry);
765void ecryptfs_put_lower_file(struct inode *inode);
765int 766int
766ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, 767ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
767 size_t *packet_size, 768 size_t *packet_size,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7d1050e254f9..566e5472f78c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -191,10 +191,10 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 194 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
195 if (rc) { 195 if (rc) {
196 printk(KERN_ERR "%s: Error attempting to initialize " 196 printk(KERN_ERR "%s: Error attempting to initialize "
197 "the persistent file for the dentry with name " 197 "the lower file for the dentry with name "
198 "[%s]; rc = [%d]\n", __func__, 198 "[%s]; rc = [%d]\n", __func__,
199 ecryptfs_dentry->d_name.name, rc); 199 ecryptfs_dentry->d_name.name, rc);
200 goto out_free; 200 goto out_free;
@@ -202,9 +202,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
202 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) 202 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
203 == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) { 203 == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
204 rc = -EPERM; 204 rc = -EPERM;
205 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " 205 printk(KERN_WARNING "%s: Lower file is RO; eCryptfs "
206 "file must hence be opened RO\n", __func__); 206 "file must hence be opened RO\n", __func__);
207 goto out_free; 207 goto out_put;
208 } 208 }
209 ecryptfs_set_file_lower( 209 ecryptfs_set_file_lower(
210 file, ecryptfs_inode_to_private(inode)->lower_file); 210 file, ecryptfs_inode_to_private(inode)->lower_file);
@@ -232,10 +232,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
232 "Plaintext passthrough mode is not " 232 "Plaintext passthrough mode is not "
233 "enabled; returning -EIO\n"); 233 "enabled; returning -EIO\n");
234 mutex_unlock(&crypt_stat->cs_mutex); 234 mutex_unlock(&crypt_stat->cs_mutex);
235 goto out_free; 235 goto out_put;
236 } 236 }
237 rc = 0; 237 rc = 0;
238 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 238 crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
239 | ECRYPTFS_ENCRYPTED);
239 mutex_unlock(&crypt_stat->cs_mutex); 240 mutex_unlock(&crypt_stat->cs_mutex);
240 goto out; 241 goto out;
241 } 242 }
@@ -245,6 +246,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
245 "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, 246 "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
246 (unsigned long long)i_size_read(inode)); 247 (unsigned long long)i_size_read(inode));
247 goto out; 248 goto out;
249out_put:
250 ecryptfs_put_lower_file(inode);
248out_free: 251out_free:
249 kmem_cache_free(ecryptfs_file_info_cache, 252 kmem_cache_free(ecryptfs_file_info_cache,
250 ecryptfs_file_to_private(file)); 253 ecryptfs_file_to_private(file));
@@ -254,17 +257,13 @@ out:
254 257
255static int ecryptfs_flush(struct file *file, fl_owner_t td) 258static int ecryptfs_flush(struct file *file, fl_owner_t td)
256{ 259{
257 int rc = 0; 260 return file->f_mode & FMODE_WRITE
258 struct file *lower_file = NULL; 261 ? filemap_write_and_wait(file->f_mapping) : 0;
259
260 lower_file = ecryptfs_file_to_lower(file);
261 if (lower_file->f_op && lower_file->f_op->flush)
262 rc = lower_file->f_op->flush(lower_file, td);
263 return rc;
264} 262}
265 263
266static int ecryptfs_release(struct inode *inode, struct file *file) 264static int ecryptfs_release(struct inode *inode, struct file *file)
267{ 265{
266 ecryptfs_put_lower_file(inode);
268 kmem_cache_free(ecryptfs_file_info_cache, 267 kmem_cache_free(ecryptfs_file_info_cache,
269 ecryptfs_file_to_private(file)); 268 ecryptfs_file_to_private(file));
270 return 0; 269 return 0;
@@ -273,7 +272,14 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
273static int 272static int
274ecryptfs_fsync(struct file *file, int datasync) 273ecryptfs_fsync(struct file *file, int datasync)
275{ 274{
276 return vfs_fsync(ecryptfs_file_to_lower(file), datasync); 275 int rc = 0;
276
277 rc = generic_file_fsync(file, datasync);
278 if (rc)
279 goto out;
280 rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync);
281out:
282 return rc;
277} 283}
278 284
279static int ecryptfs_fasync(int fd, struct file *file, int flag) 285static int ecryptfs_fasync(int fd, struct file *file, int flag)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index b592938a84bc..4d4cc6a90cd5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -143,26 +143,6 @@ out:
143} 143}
144 144
145/** 145/**
146 * grow_file
147 * @ecryptfs_dentry: the eCryptfs dentry
148 *
149 * This is the code which will grow the file to its correct size.
150 */
151static int grow_file(struct dentry *ecryptfs_dentry)
152{
153 struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
154 char zero_virt[] = { 0x00 };
155 int rc = 0;
156
157 rc = ecryptfs_write(ecryptfs_inode, zero_virt, 0, 1);
158 i_size_write(ecryptfs_inode, 0);
159 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
160 ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |=
161 ECRYPTFS_NEW_FILE;
162 return rc;
163}
164
165/**
166 * ecryptfs_initialize_file 146 * ecryptfs_initialize_file
167 * 147 *
168 * Cause the file to be changed from a basic empty file to an ecryptfs 148 * Cause the file to be changed from a basic empty file to an ecryptfs
@@ -181,7 +161,6 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
181 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 161 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
182 goto out; 162 goto out;
183 } 163 }
184 crypt_stat->flags |= ECRYPTFS_NEW_FILE;
185 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); 164 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
186 rc = ecryptfs_new_file_context(ecryptfs_dentry); 165 rc = ecryptfs_new_file_context(ecryptfs_dentry);
187 if (rc) { 166 if (rc) {
@@ -189,22 +168,18 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
189 "context; rc = [%d]\n", rc); 168 "context; rc = [%d]\n", rc);
190 goto out; 169 goto out;
191 } 170 }
192 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 171 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
193 if (rc) { 172 if (rc) {
194 printk(KERN_ERR "%s: Error attempting to initialize " 173 printk(KERN_ERR "%s: Error attempting to initialize "
195 "the persistent file for the dentry with name " 174 "the lower file for the dentry with name "
196 "[%s]; rc = [%d]\n", __func__, 175 "[%s]; rc = [%d]\n", __func__,
197 ecryptfs_dentry->d_name.name, rc); 176 ecryptfs_dentry->d_name.name, rc);
198 goto out; 177 goto out;
199 } 178 }
200 rc = ecryptfs_write_metadata(ecryptfs_dentry); 179 rc = ecryptfs_write_metadata(ecryptfs_dentry);
201 if (rc) {
202 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
203 goto out;
204 }
205 rc = grow_file(ecryptfs_dentry);
206 if (rc) 180 if (rc)
207 printk(KERN_ERR "Error growing file; rc = [%d]\n", rc); 181 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
182 ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
208out: 183out:
209 return rc; 184 return rc;
210} 185}
@@ -250,11 +225,9 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
250 struct dentry *lower_dir_dentry; 225 struct dentry *lower_dir_dentry;
251 struct vfsmount *lower_mnt; 226 struct vfsmount *lower_mnt;
252 struct inode *lower_inode; 227 struct inode *lower_inode;
253 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
254 struct ecryptfs_crypt_stat *crypt_stat; 228 struct ecryptfs_crypt_stat *crypt_stat;
255 char *page_virt = NULL; 229 char *page_virt = NULL;
256 u64 file_size; 230 int put_lower = 0, rc = 0;
257 int rc = 0;
258 231
259 lower_dir_dentry = lower_dentry->d_parent; 232 lower_dir_dentry = lower_dentry->d_parent;
260 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( 233 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
@@ -301,14 +274,15 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
301 rc = -ENOMEM; 274 rc = -ENOMEM;
302 goto out; 275 goto out;
303 } 276 }
304 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 277 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
305 if (rc) { 278 if (rc) {
306 printk(KERN_ERR "%s: Error attempting to initialize " 279 printk(KERN_ERR "%s: Error attempting to initialize "
307 "the persistent file for the dentry with name " 280 "the lower file for the dentry with name "
308 "[%s]; rc = [%d]\n", __func__, 281 "[%s]; rc = [%d]\n", __func__,
309 ecryptfs_dentry->d_name.name, rc); 282 ecryptfs_dentry->d_name.name, rc);
310 goto out_free_kmem; 283 goto out_free_kmem;
311 } 284 }
285 put_lower = 1;
312 crypt_stat = &ecryptfs_inode_to_private( 286 crypt_stat = &ecryptfs_inode_to_private(
313 ecryptfs_dentry->d_inode)->crypt_stat; 287 ecryptfs_dentry->d_inode)->crypt_stat;
314 /* TODO: lock for crypt_stat comparison */ 288 /* TODO: lock for crypt_stat comparison */
@@ -326,18 +300,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
326 } 300 }
327 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; 301 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
328 } 302 }
329 mount_crypt_stat = &ecryptfs_superblock_to_private( 303 ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
330 ecryptfs_dentry->d_sb)->mount_crypt_stat;
331 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
332 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
333 file_size = (crypt_stat->metadata_size
334 + i_size_read(lower_dentry->d_inode));
335 else
336 file_size = i_size_read(lower_dentry->d_inode);
337 } else {
338 file_size = get_unaligned_be64(page_virt);
339 }
340 i_size_write(ecryptfs_dentry->d_inode, (loff_t)file_size);
341out_free_kmem: 304out_free_kmem:
342 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 305 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
343 goto out; 306 goto out;
@@ -346,6 +309,8 @@ out_put:
346 mntput(lower_mnt); 309 mntput(lower_mnt);
347 d_drop(ecryptfs_dentry); 310 d_drop(ecryptfs_dentry);
348out: 311out:
312 if (put_lower)
313 ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
349 return rc; 314 return rc;
350} 315}
351 316
@@ -562,8 +527,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
562 dget(lower_dentry); 527 dget(lower_dentry);
563 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); 528 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
564 dput(lower_dentry); 529 dput(lower_dentry);
565 if (!rc)
566 d_delete(lower_dentry);
567 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 530 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
568 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 531 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
569 unlock_dir(lower_dir_dentry); 532 unlock_dir(lower_dir_dentry);
@@ -634,8 +597,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
634 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); 597 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
635out_lock: 598out_lock:
636 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 599 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
637 dput(lower_new_dentry->d_parent); 600 dput(lower_new_dir_dentry);
638 dput(lower_old_dentry->d_parent); 601 dput(lower_old_dir_dentry);
639 dput(lower_new_dentry); 602 dput(lower_new_dentry);
640 dput(lower_old_dentry); 603 dput(lower_old_dentry);
641 return rc; 604 return rc;
@@ -783,8 +746,11 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
783 746
784 if (unlikely((ia->ia_size == i_size))) { 747 if (unlikely((ia->ia_size == i_size))) {
785 lower_ia->ia_valid &= ~ATTR_SIZE; 748 lower_ia->ia_valid &= ~ATTR_SIZE;
786 goto out; 749 return 0;
787 } 750 }
751 rc = ecryptfs_get_lower_file(dentry);
752 if (rc)
753 return rc;
788 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 754 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
789 /* Switch on growing or shrinking file */ 755 /* Switch on growing or shrinking file */
790 if (ia->ia_size > i_size) { 756 if (ia->ia_size > i_size) {
@@ -862,6 +828,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
862 lower_ia->ia_valid &= ~ATTR_SIZE; 828 lower_ia->ia_valid &= ~ATTR_SIZE;
863 } 829 }
864out: 830out:
831 ecryptfs_put_lower_file(inode);
865 return rc; 832 return rc;
866} 833}
867 834
@@ -937,7 +904,13 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
937 904
938 mount_crypt_stat = &ecryptfs_superblock_to_private( 905 mount_crypt_stat = &ecryptfs_superblock_to_private(
939 dentry->d_sb)->mount_crypt_stat; 906 dentry->d_sb)->mount_crypt_stat;
907 rc = ecryptfs_get_lower_file(dentry);
908 if (rc) {
909 mutex_unlock(&crypt_stat->cs_mutex);
910 goto out;
911 }
940 rc = ecryptfs_read_metadata(dentry); 912 rc = ecryptfs_read_metadata(dentry);
913 ecryptfs_put_lower_file(inode);
941 if (rc) { 914 if (rc) {
942 if (!(mount_crypt_stat->flags 915 if (!(mount_crypt_stat->flags
943 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { 916 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
@@ -951,10 +924,17 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
951 goto out; 924 goto out;
952 } 925 }
953 rc = 0; 926 rc = 0;
954 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 927 crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
928 | ECRYPTFS_ENCRYPTED);
955 } 929 }
956 } 930 }
957 mutex_unlock(&crypt_stat->cs_mutex); 931 mutex_unlock(&crypt_stat->cs_mutex);
932 if (S_ISREG(inode->i_mode)) {
933 rc = filemap_write_and_wait(inode->i_mapping);
934 if (rc)
935 goto out;
936 fsstack_copy_attr_all(inode, lower_inode);
937 }
958 memcpy(&lower_ia, ia, sizeof(lower_ia)); 938 memcpy(&lower_ia, ia, sizeof(lower_ia));
959 if (ia->ia_valid & ATTR_FILE) 939 if (ia->ia_valid & ATTR_FILE)
960 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file); 940 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c1436cff6f2d..03e609c45012 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -65,6 +65,24 @@ static int process_request_key_err(long err_code)
65 return rc; 65 return rc;
66} 66}
67 67
68static int process_find_global_auth_tok_for_sig_err(int err_code)
69{
70 int rc = err_code;
71
72 switch (err_code) {
73 case -ENOENT:
74 ecryptfs_printk(KERN_WARNING, "Missing auth tok\n");
75 break;
76 case -EINVAL:
77 ecryptfs_printk(KERN_WARNING, "Invalid auth tok\n");
78 break;
79 default:
80 rc = process_request_key_err(err_code);
81 break;
82 }
83 return rc;
84}
85
68/** 86/**
69 * ecryptfs_parse_packet_length 87 * ecryptfs_parse_packet_length
70 * @data: Pointer to memory containing length at offset 88 * @data: Pointer to memory containing length at offset
@@ -403,27 +421,120 @@ out:
403 return rc; 421 return rc;
404} 422}
405 423
424/**
425 * ecryptfs_verify_version
426 * @version: The version number to confirm
427 *
428 * Returns zero on good version; non-zero otherwise
429 */
430static int ecryptfs_verify_version(u16 version)
431{
432 int rc = 0;
433 unsigned char major;
434 unsigned char minor;
435
436 major = ((version >> 8) & 0xFF);
437 minor = (version & 0xFF);
438 if (major != ECRYPTFS_VERSION_MAJOR) {
439 ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
440 "Expected [%d]; got [%d]\n",
441 ECRYPTFS_VERSION_MAJOR, major);
442 rc = -EINVAL;
443 goto out;
444 }
445 if (minor != ECRYPTFS_VERSION_MINOR) {
446 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
447 "Expected [%d]; got [%d]\n",
448 ECRYPTFS_VERSION_MINOR, minor);
449 rc = -EINVAL;
450 goto out;
451 }
452out:
453 return rc;
454}
455
456/**
457 * ecryptfs_verify_auth_tok_from_key
458 * @auth_tok_key: key containing the authentication token
459 * @auth_tok: authentication token
460 *
461 * Returns zero on valid auth tok; -EINVAL otherwise
462 */
463static int
464ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key,
465 struct ecryptfs_auth_tok **auth_tok)
466{
467 int rc = 0;
468
469 (*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key);
470 if (ecryptfs_verify_version((*auth_tok)->version)) {
471 printk(KERN_ERR "Data structure version mismatch. Userspace "
472 "tools must match eCryptfs kernel module with major "
473 "version [%d] and minor version [%d]\n",
474 ECRYPTFS_VERSION_MAJOR, ECRYPTFS_VERSION_MINOR);
475 rc = -EINVAL;
476 goto out;
477 }
478 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
479 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
480 printk(KERN_ERR "Invalid auth_tok structure "
481 "returned from key query\n");
482 rc = -EINVAL;
483 goto out;
484 }
485out:
486 return rc;
487}
488
406static int 489static int
407ecryptfs_find_global_auth_tok_for_sig( 490ecryptfs_find_global_auth_tok_for_sig(
408 struct ecryptfs_global_auth_tok **global_auth_tok, 491 struct key **auth_tok_key,
492 struct ecryptfs_auth_tok **auth_tok,
409 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig) 493 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
410{ 494{
411 struct ecryptfs_global_auth_tok *walker; 495 struct ecryptfs_global_auth_tok *walker;
412 int rc = 0; 496 int rc = 0;
413 497
414 (*global_auth_tok) = NULL; 498 (*auth_tok_key) = NULL;
499 (*auth_tok) = NULL;
415 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); 500 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
416 list_for_each_entry(walker, 501 list_for_each_entry(walker,
417 &mount_crypt_stat->global_auth_tok_list, 502 &mount_crypt_stat->global_auth_tok_list,
418 mount_crypt_stat_list) { 503 mount_crypt_stat_list) {
419 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { 504 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX))
420 rc = key_validate(walker->global_auth_tok_key); 505 continue;
421 if (!rc) 506
422 (*global_auth_tok) = walker; 507 if (walker->flags & ECRYPTFS_AUTH_TOK_INVALID) {
508 rc = -EINVAL;
423 goto out; 509 goto out;
424 } 510 }
511
512 rc = key_validate(walker->global_auth_tok_key);
513 if (rc) {
514 if (rc == -EKEYEXPIRED)
515 goto out;
516 goto out_invalid_auth_tok;
517 }
518
519 down_write(&(walker->global_auth_tok_key->sem));
520 rc = ecryptfs_verify_auth_tok_from_key(
521 walker->global_auth_tok_key, auth_tok);
522 if (rc)
523 goto out_invalid_auth_tok_unlock;
524
525 (*auth_tok_key) = walker->global_auth_tok_key;
526 key_get(*auth_tok_key);
527 goto out;
425 } 528 }
426 rc = -EINVAL; 529 rc = -ENOENT;
530 goto out;
531out_invalid_auth_tok_unlock:
532 up_write(&(walker->global_auth_tok_key->sem));
533out_invalid_auth_tok:
534 printk(KERN_WARNING "Invalidating auth tok with sig = [%s]\n", sig);
535 walker->flags |= ECRYPTFS_AUTH_TOK_INVALID;
536 key_put(walker->global_auth_tok_key);
537 walker->global_auth_tok_key = NULL;
427out: 538out:
428 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); 539 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
429 return rc; 540 return rc;
@@ -451,14 +562,11 @@ ecryptfs_find_auth_tok_for_sig(
451 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 562 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
452 char *sig) 563 char *sig)
453{ 564{
454 struct ecryptfs_global_auth_tok *global_auth_tok;
455 int rc = 0; 565 int rc = 0;
456 566
457 (*auth_tok_key) = NULL; 567 rc = ecryptfs_find_global_auth_tok_for_sig(auth_tok_key, auth_tok,
458 (*auth_tok) = NULL; 568 mount_crypt_stat, sig);
459 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, 569 if (rc == -ENOENT) {
460 mount_crypt_stat, sig)) {
461
462 /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the 570 /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
463 * mount_crypt_stat structure, we prevent to use auth toks that 571 * mount_crypt_stat structure, we prevent to use auth toks that
464 * are not inserted through the ecryptfs_add_global_auth_tok 572 * are not inserted through the ecryptfs_add_global_auth_tok
@@ -470,8 +578,7 @@ ecryptfs_find_auth_tok_for_sig(
470 578
471 rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok, 579 rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
472 sig); 580 sig);
473 } else 581 }
474 (*auth_tok) = global_auth_tok->global_auth_tok;
475 return rc; 582 return rc;
476} 583}
477 584
@@ -531,6 +638,16 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
531 } 638 }
532 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 639 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
533 (*packet_size) = 0; 640 (*packet_size) = 0;
641 rc = ecryptfs_find_auth_tok_for_sig(
642 &auth_tok_key,
643 &s->auth_tok, mount_crypt_stat,
644 mount_crypt_stat->global_default_fnek_sig);
645 if (rc) {
646 printk(KERN_ERR "%s: Error attempting to find auth tok for "
647 "fnek sig [%s]; rc = [%d]\n", __func__,
648 mount_crypt_stat->global_default_fnek_sig, rc);
649 goto out;
650 }
534 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name( 651 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(
535 &s->desc.tfm, 652 &s->desc.tfm,
536 &s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name); 653 &s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name);
@@ -616,16 +733,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
616 goto out_free_unlock; 733 goto out_free_unlock;
617 } 734 }
618 dest[s->i++] = s->cipher_code; 735 dest[s->i++] = s->cipher_code;
619 rc = ecryptfs_find_auth_tok_for_sig(
620 &auth_tok_key,
621 &s->auth_tok, mount_crypt_stat,
622 mount_crypt_stat->global_default_fnek_sig);
623 if (rc) {
624 printk(KERN_ERR "%s: Error attempting to find auth tok for "
625 "fnek sig [%s]; rc = [%d]\n", __func__,
626 mount_crypt_stat->global_default_fnek_sig, rc);
627 goto out_free_unlock;
628 }
629 /* TODO: Support other key modules than passphrase for 736 /* TODO: Support other key modules than passphrase for
630 * filename encryption */ 737 * filename encryption */
631 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { 738 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
@@ -765,8 +872,10 @@ out_free_unlock:
765out_unlock: 872out_unlock:
766 mutex_unlock(s->tfm_mutex); 873 mutex_unlock(s->tfm_mutex);
767out: 874out:
768 if (auth_tok_key) 875 if (auth_tok_key) {
876 up_write(&(auth_tok_key->sem));
769 key_put(auth_tok_key); 877 key_put(auth_tok_key);
878 }
770 kfree(s); 879 kfree(s);
771 return rc; 880 return rc;
772} 881}
@@ -879,6 +988,15 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
879 __func__, s->cipher_code); 988 __func__, s->cipher_code);
880 goto out; 989 goto out;
881 } 990 }
991 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
992 &s->auth_tok, mount_crypt_stat,
993 s->fnek_sig_hex);
994 if (rc) {
995 printk(KERN_ERR "%s: Error attempting to find auth tok for "
996 "fnek sig [%s]; rc = [%d]\n", __func__, s->fnek_sig_hex,
997 rc);
998 goto out;
999 }
882 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm, 1000 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm,
883 &s->tfm_mutex, 1001 &s->tfm_mutex,
884 s->cipher_string); 1002 s->cipher_string);
@@ -925,15 +1043,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
925 * >= ECRYPTFS_MAX_IV_BYTES. */ 1043 * >= ECRYPTFS_MAX_IV_BYTES. */
926 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES); 1044 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
927 s->desc.info = s->iv; 1045 s->desc.info = s->iv;
928 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
929 &s->auth_tok, mount_crypt_stat,
930 s->fnek_sig_hex);
931 if (rc) {
932 printk(KERN_ERR "%s: Error attempting to find auth tok for "
933 "fnek sig [%s]; rc = [%d]\n", __func__, s->fnek_sig_hex,
934 rc);
935 goto out_free_unlock;
936 }
937 /* TODO: Support other key modules than passphrase for 1046 /* TODO: Support other key modules than passphrase for
938 * filename encryption */ 1047 * filename encryption */
939 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { 1048 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
@@ -1002,8 +1111,10 @@ out:
1002 (*filename_size) = 0; 1111 (*filename_size) = 0;
1003 (*filename) = NULL; 1112 (*filename) = NULL;
1004 } 1113 }
1005 if (auth_tok_key) 1114 if (auth_tok_key) {
1115 up_write(&(auth_tok_key->sem));
1006 key_put(auth_tok_key); 1116 key_put(auth_tok_key);
1117 }
1007 kfree(s); 1118 kfree(s);
1008 return rc; 1119 return rc;
1009} 1120}
@@ -1520,38 +1631,6 @@ out:
1520 return rc; 1631 return rc;
1521} 1632}
1522 1633
1523/**
1524 * ecryptfs_verify_version
1525 * @version: The version number to confirm
1526 *
1527 * Returns zero on good version; non-zero otherwise
1528 */
1529static int ecryptfs_verify_version(u16 version)
1530{
1531 int rc = 0;
1532 unsigned char major;
1533 unsigned char minor;
1534
1535 major = ((version >> 8) & 0xFF);
1536 minor = (version & 0xFF);
1537 if (major != ECRYPTFS_VERSION_MAJOR) {
1538 ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
1539 "Expected [%d]; got [%d]\n",
1540 ECRYPTFS_VERSION_MAJOR, major);
1541 rc = -EINVAL;
1542 goto out;
1543 }
1544 if (minor != ECRYPTFS_VERSION_MINOR) {
1545 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
1546 "Expected [%d]; got [%d]\n",
1547 ECRYPTFS_VERSION_MINOR, minor);
1548 rc = -EINVAL;
1549 goto out;
1550 }
1551out:
1552 return rc;
1553}
1554
1555int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, 1634int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
1556 struct ecryptfs_auth_tok **auth_tok, 1635 struct ecryptfs_auth_tok **auth_tok,
1557 char *sig) 1636 char *sig)
@@ -1563,31 +1642,16 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
1563 printk(KERN_ERR "Could not find key with description: [%s]\n", 1642 printk(KERN_ERR "Could not find key with description: [%s]\n",
1564 sig); 1643 sig);
1565 rc = process_request_key_err(PTR_ERR(*auth_tok_key)); 1644 rc = process_request_key_err(PTR_ERR(*auth_tok_key));
1645 (*auth_tok_key) = NULL;
1566 goto out; 1646 goto out;
1567 } 1647 }
1568 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); 1648 down_write(&(*auth_tok_key)->sem);
1569 if (ecryptfs_verify_version((*auth_tok)->version)) { 1649 rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok);
1570 printk(KERN_ERR
1571 "Data structure version mismatch. "
1572 "Userspace tools must match eCryptfs "
1573 "kernel module with major version [%d] "
1574 "and minor version [%d]\n",
1575 ECRYPTFS_VERSION_MAJOR,
1576 ECRYPTFS_VERSION_MINOR);
1577 rc = -EINVAL;
1578 goto out_release_key;
1579 }
1580 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
1581 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
1582 printk(KERN_ERR "Invalid auth_tok structure "
1583 "returned from key query\n");
1584 rc = -EINVAL;
1585 goto out_release_key;
1586 }
1587out_release_key:
1588 if (rc) { 1650 if (rc) {
1651 up_write(&(*auth_tok_key)->sem);
1589 key_put(*auth_tok_key); 1652 key_put(*auth_tok_key);
1590 (*auth_tok_key) = NULL; 1653 (*auth_tok_key) = NULL;
1654 goto out;
1591 } 1655 }
1592out: 1656out:
1593 return rc; 1657 return rc;
@@ -1809,6 +1873,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1809find_next_matching_auth_tok: 1873find_next_matching_auth_tok:
1810 found_auth_tok = 0; 1874 found_auth_tok = 0;
1811 if (auth_tok_key) { 1875 if (auth_tok_key) {
1876 up_write(&(auth_tok_key->sem));
1812 key_put(auth_tok_key); 1877 key_put(auth_tok_key);
1813 auth_tok_key = NULL; 1878 auth_tok_key = NULL;
1814 } 1879 }
@@ -1895,8 +1960,10 @@ found_matching_auth_tok:
1895out_wipe_list: 1960out_wipe_list:
1896 wipe_auth_tok_list(&auth_tok_list); 1961 wipe_auth_tok_list(&auth_tok_list);
1897out: 1962out:
1898 if (auth_tok_key) 1963 if (auth_tok_key) {
1964 up_write(&(auth_tok_key->sem));
1899 key_put(auth_tok_key); 1965 key_put(auth_tok_key);
1966 }
1900 return rc; 1967 return rc;
1901} 1968}
1902 1969
@@ -2324,7 +2391,7 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2324 size_t max) 2391 size_t max)
2325{ 2392{
2326 struct ecryptfs_auth_tok *auth_tok; 2393 struct ecryptfs_auth_tok *auth_tok;
2327 struct ecryptfs_global_auth_tok *global_auth_tok; 2394 struct key *auth_tok_key = NULL;
2328 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 2395 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
2329 &ecryptfs_superblock_to_private( 2396 &ecryptfs_superblock_to_private(
2330 ecryptfs_dentry->d_sb)->mount_crypt_stat; 2397 ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -2343,21 +2410,16 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2343 list_for_each_entry(key_sig, &crypt_stat->keysig_list, 2410 list_for_each_entry(key_sig, &crypt_stat->keysig_list,
2344 crypt_stat_list) { 2411 crypt_stat_list) {
2345 memset(key_rec, 0, sizeof(*key_rec)); 2412 memset(key_rec, 0, sizeof(*key_rec));
2346 rc = ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, 2413 rc = ecryptfs_find_global_auth_tok_for_sig(&auth_tok_key,
2414 &auth_tok,
2347 mount_crypt_stat, 2415 mount_crypt_stat,
2348 key_sig->keysig); 2416 key_sig->keysig);
2349 if (rc) { 2417 if (rc) {
2350 printk(KERN_ERR "Error attempting to get the global " 2418 printk(KERN_WARNING "Unable to retrieve auth tok with "
2351 "auth_tok; rc = [%d]\n", rc); 2419 "sig = [%s]\n", key_sig->keysig);
2420 rc = process_find_global_auth_tok_for_sig_err(rc);
2352 goto out_free; 2421 goto out_free;
2353 } 2422 }
2354 if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID) {
2355 printk(KERN_WARNING
2356 "Skipping invalid auth tok with sig = [%s]\n",
2357 global_auth_tok->sig);
2358 continue;
2359 }
2360 auth_tok = global_auth_tok->global_auth_tok;
2361 if (auth_tok->token_type == ECRYPTFS_PASSWORD) { 2423 if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
2362 rc = write_tag_3_packet((dest_base + (*len)), 2424 rc = write_tag_3_packet((dest_base + (*len)),
2363 &max, auth_tok, 2425 &max, auth_tok,
@@ -2395,6 +2457,9 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2395 rc = -EINVAL; 2457 rc = -EINVAL;
2396 goto out_free; 2458 goto out_free;
2397 } 2459 }
2460 up_write(&(auth_tok_key->sem));
2461 key_put(auth_tok_key);
2462 auth_tok_key = NULL;
2398 } 2463 }
2399 if (likely(max > 0)) { 2464 if (likely(max > 0)) {
2400 dest_base[(*len)] = 0x00; 2465 dest_base[(*len)] = 0x00;
@@ -2407,6 +2472,11 @@ out_free:
2407out: 2472out:
2408 if (rc) 2473 if (rc)
2409 (*len) = 0; 2474 (*len) = 0;
2475 if (auth_tok_key) {
2476 up_write(&(auth_tok_key->sem));
2477 key_put(auth_tok_key);
2478 }
2479
2410 mutex_unlock(&crypt_stat->keysig_list_mutex); 2480 mutex_unlock(&crypt_stat->keysig_list_mutex);
2411 return rc; 2481 return rc;
2412} 2482}
@@ -2424,6 +2494,7 @@ int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
2424 return -ENOMEM; 2494 return -ENOMEM;
2425 } 2495 }
2426 memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX); 2496 memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
2497 new_key_sig->keysig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
2427 /* Caller must hold keysig_list_mutex */ 2498 /* Caller must hold keysig_list_mutex */
2428 list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list); 2499 list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list);
2429 2500
@@ -2453,7 +2524,6 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
2453 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); 2524 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
2454 list_add(&new_auth_tok->mount_crypt_stat_list, 2525 list_add(&new_auth_tok->mount_crypt_stat_list,
2455 &mount_crypt_stat->global_auth_tok_list); 2526 &mount_crypt_stat->global_auth_tok_list);
2456 mount_crypt_stat->num_global_auth_toks++;
2457 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); 2527 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
2458out: 2528out:
2459 return rc; 2529 return rc;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 0851ab6980f5..69f994a7d524 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -44,7 +44,7 @@ static struct task_struct *ecryptfs_kthread;
44 * @ignored: ignored 44 * @ignored: ignored
45 * 45 *
46 * The eCryptfs kernel thread that has the responsibility of getting 46 * The eCryptfs kernel thread that has the responsibility of getting
47 * the lower persistent file with RW permissions. 47 * the lower file with RW permissions.
48 * 48 *
49 * Returns zero on success; non-zero otherwise 49 * Returns zero on success; non-zero otherwise
50 */ 50 */
@@ -141,8 +141,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
141 int rc = 0; 141 int rc = 0;
142 142
143 /* Corresponding dput() and mntput() are done when the 143 /* Corresponding dput() and mntput() are done when the
144 * persistent file is fput() when the eCryptfs inode is 144 * lower file is fput() when all eCryptfs files for the inode are
145 * destroyed. */ 145 * released. */
146 dget(lower_dentry); 146 dget(lower_dentry);
147 mntget(lower_mnt); 147 mntget(lower_mnt);
148 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; 148 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 758323a0f09a..89b93389af8e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -96,7 +96,7 @@ void __ecryptfs_printk(const char *fmt, ...)
96} 96}
97 97
98/** 98/**
99 * ecryptfs_init_persistent_file 99 * ecryptfs_init_lower_file
100 * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with 100 * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with
101 * the lower dentry and the lower mount set 101 * the lower dentry and the lower mount set
102 * 102 *
@@ -104,44 +104,70 @@ void __ecryptfs_printk(const char *fmt, ...)
104 * inode. All I/O operations to the lower inode occur through that 104 * inode. All I/O operations to the lower inode occur through that
105 * file. When the first eCryptfs dentry that interposes with the first 105 * file. When the first eCryptfs dentry that interposes with the first
106 * lower dentry for that inode is created, this function creates the 106 * lower dentry for that inode is created, this function creates the
107 * persistent file struct and associates it with the eCryptfs 107 * lower file struct and associates it with the eCryptfs
108 * inode. When the eCryptfs inode is destroyed, the file is closed. 108 * inode. When all eCryptfs files associated with the inode are released, the
109 * file is closed.
109 * 110 *
110 * The persistent file will be opened with read/write permissions, if 111 * The lower file will be opened with read/write permissions, if
111 * possible. Otherwise, it is opened read-only. 112 * possible. Otherwise, it is opened read-only.
112 * 113 *
113 * This function does nothing if a lower persistent file is already 114 * This function does nothing if a lower file is already
114 * associated with the eCryptfs inode. 115 * associated with the eCryptfs inode.
115 * 116 *
116 * Returns zero on success; non-zero otherwise 117 * Returns zero on success; non-zero otherwise
117 */ 118 */
118int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 119static int ecryptfs_init_lower_file(struct dentry *dentry,
120 struct file **lower_file)
119{ 121{
120 const struct cred *cred = current_cred(); 122 const struct cred *cred = current_cred();
123 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
124 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
125 int rc;
126
127 rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt,
128 cred);
129 if (rc) {
130 printk(KERN_ERR "Error opening lower file "
131 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
132 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
133 (*lower_file) = NULL;
134 }
135 return rc;
136}
137
138int ecryptfs_get_lower_file(struct dentry *dentry)
139{
121 struct ecryptfs_inode_info *inode_info = 140 struct ecryptfs_inode_info *inode_info =
122 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 141 ecryptfs_inode_to_private(dentry->d_inode);
123 int rc = 0; 142 int count, rc = 0;
124 143
125 mutex_lock(&inode_info->lower_file_mutex); 144 mutex_lock(&inode_info->lower_file_mutex);
126 if (!inode_info->lower_file) { 145 count = atomic_inc_return(&inode_info->lower_file_count);
127 struct dentry *lower_dentry; 146 if (WARN_ON_ONCE(count < 1))
128 struct vfsmount *lower_mnt = 147 rc = -EINVAL;
129 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); 148 else if (count == 1) {
130 149 rc = ecryptfs_init_lower_file(dentry,
131 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 150 &inode_info->lower_file);
132 rc = ecryptfs_privileged_open(&inode_info->lower_file, 151 if (rc)
133 lower_dentry, lower_mnt, cred); 152 atomic_set(&inode_info->lower_file_count, 0);
134 if (rc) {
135 printk(KERN_ERR "Error opening lower persistent file "
136 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
137 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
138 inode_info->lower_file = NULL;
139 }
140 } 153 }
141 mutex_unlock(&inode_info->lower_file_mutex); 154 mutex_unlock(&inode_info->lower_file_mutex);
142 return rc; 155 return rc;
143} 156}
144 157
158void ecryptfs_put_lower_file(struct inode *inode)
159{
160 struct ecryptfs_inode_info *inode_info;
161
162 inode_info = ecryptfs_inode_to_private(inode);
163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
164 &inode_info->lower_file_mutex)) {
165 fput(inode_info->lower_file);
166 inode_info->lower_file = NULL;
167 mutex_unlock(&inode_info->lower_file_mutex);
168 }
169}
170
145static struct inode *ecryptfs_get_inode(struct inode *lower_inode, 171static struct inode *ecryptfs_get_inode(struct inode *lower_inode,
146 struct super_block *sb) 172 struct super_block *sb)
147{ 173{
@@ -241,14 +267,14 @@ static int ecryptfs_init_global_auth_toks(
241 struct ecryptfs_mount_crypt_stat *mount_crypt_stat) 267 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
242{ 268{
243 struct ecryptfs_global_auth_tok *global_auth_tok; 269 struct ecryptfs_global_auth_tok *global_auth_tok;
270 struct ecryptfs_auth_tok *auth_tok;
244 int rc = 0; 271 int rc = 0;
245 272
246 list_for_each_entry(global_auth_tok, 273 list_for_each_entry(global_auth_tok,
247 &mount_crypt_stat->global_auth_tok_list, 274 &mount_crypt_stat->global_auth_tok_list,
248 mount_crypt_stat_list) { 275 mount_crypt_stat_list) {
249 rc = ecryptfs_keyring_auth_tok_for_sig( 276 rc = ecryptfs_keyring_auth_tok_for_sig(
250 &global_auth_tok->global_auth_tok_key, 277 &global_auth_tok->global_auth_tok_key, &auth_tok,
251 &global_auth_tok->global_auth_tok,
252 global_auth_tok->sig); 278 global_auth_tok->sig);
253 if (rc) { 279 if (rc) {
254 printk(KERN_ERR "Could not find valid key in user " 280 printk(KERN_ERR "Could not find valid key in user "
@@ -256,8 +282,10 @@ static int ecryptfs_init_global_auth_toks(
256 "option: [%s]\n", global_auth_tok->sig); 282 "option: [%s]\n", global_auth_tok->sig);
257 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; 283 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
258 goto out; 284 goto out;
259 } else 285 } else {
260 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; 286 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
287 up_write(&(global_auth_tok->global_auth_tok_key)->sem);
288 }
261 } 289 }
262out: 290out:
263 return rc; 291 return rc;
@@ -276,7 +304,7 @@ static void ecryptfs_init_mount_crypt_stat(
276/** 304/**
277 * ecryptfs_parse_options 305 * ecryptfs_parse_options
278 * @sb: The ecryptfs super block 306 * @sb: The ecryptfs super block
279 * @options: The options pased to the kernel 307 * @options: The options passed to the kernel
280 * 308 *
281 * Parse mount options: 309 * Parse mount options:
282 * debug=N - ecryptfs_verbosity level for debug output 310 * debug=N - ecryptfs_verbosity level for debug output
@@ -840,7 +868,7 @@ static int __init ecryptfs_init(void)
840 } 868 }
841 rc = ecryptfs_init_messaging(); 869 rc = ecryptfs_init_messaging();
842 if (rc) { 870 if (rc) {
843 printk(KERN_ERR "Failure occured while attempting to " 871 printk(KERN_ERR "Failure occurred while attempting to "
844 "initialize the communications channel to " 872 "initialize the communications channel to "
845 "ecryptfsd\n"); 873 "ecryptfsd\n");
846 goto out_destroy_kthread; 874 goto out_destroy_kthread;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index cc64fca89f8d..6a44148c5fb9 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -62,6 +62,18 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
62{ 62{
63 int rc; 63 int rc;
64 64
65 /*
66 * Refuse to write the page out if we are called from reclaim context
67 * since our writepage() path may potentially allocate memory when
68 * calling into the lower fs vfs_write() which may in turn invoke
69 * us again.
70 */
71 if (current->flags & PF_MEMALLOC) {
72 redirty_page_for_writepage(wbc, page);
73 rc = 0;
74 goto out;
75 }
76
65 rc = ecryptfs_encrypt_page(page); 77 rc = ecryptfs_encrypt_page(page);
66 if (rc) { 78 if (rc) {
67 ecryptfs_printk(KERN_WARNING, "Error encrypting " 79 ecryptfs_printk(KERN_WARNING, "Error encrypting "
@@ -70,8 +82,8 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
70 goto out; 82 goto out;
71 } 83 }
72 SetPageUptodate(page); 84 SetPageUptodate(page);
73 unlock_page(page);
74out: 85out:
86 unlock_page(page);
75 return rc; 87 return rc;
76} 88}
77 89
@@ -193,11 +205,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
193 &ecryptfs_inode_to_private(page->mapping->host)->crypt_stat; 205 &ecryptfs_inode_to_private(page->mapping->host)->crypt_stat;
194 int rc = 0; 206 int rc = 0;
195 207
196 if (!crypt_stat 208 if (!crypt_stat || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
197 || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
198 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
199 ecryptfs_printk(KERN_DEBUG,
200 "Passing through unencrypted page\n");
201 rc = ecryptfs_read_lower_page_segment(page, page->index, 0, 209 rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
202 PAGE_CACHE_SIZE, 210 PAGE_CACHE_SIZE,
203 page->mapping->host); 211 page->mapping->host);
@@ -295,8 +303,7 @@ static int ecryptfs_write_begin(struct file *file,
295 struct ecryptfs_crypt_stat *crypt_stat = 303 struct ecryptfs_crypt_stat *crypt_stat =
296 &ecryptfs_inode_to_private(mapping->host)->crypt_stat; 304 &ecryptfs_inode_to_private(mapping->host)->crypt_stat;
297 305
298 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) 306 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
299 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
300 rc = ecryptfs_read_lower_page_segment( 307 rc = ecryptfs_read_lower_page_segment(
301 page, index, 0, PAGE_CACHE_SIZE, mapping->host); 308 page, index, 0, PAGE_CACHE_SIZE, mapping->host);
302 if (rc) { 309 if (rc) {
@@ -374,6 +381,11 @@ static int ecryptfs_write_begin(struct file *file,
374 && (pos != 0)) 381 && (pos != 0))
375 zero_user(page, 0, PAGE_CACHE_SIZE); 382 zero_user(page, 0, PAGE_CACHE_SIZE);
376out: 383out:
384 if (unlikely(rc)) {
385 unlock_page(page);
386 page_cache_release(page);
387 *pagep = NULL;
388 }
377 return rc; 389 return rc;
378} 390}
379 391
@@ -486,13 +498,8 @@ static int ecryptfs_write_end(struct file *file,
486 struct ecryptfs_crypt_stat *crypt_stat = 498 struct ecryptfs_crypt_stat *crypt_stat =
487 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; 499 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
488 int rc; 500 int rc;
501 int need_unlock_page = 1;
489 502
490 if (crypt_stat->flags & ECRYPTFS_NEW_FILE) {
491 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
492 "crypt_stat at memory location [%p]\n", crypt_stat);
493 crypt_stat->flags &= ~(ECRYPTFS_NEW_FILE);
494 } else
495 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
496 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" 503 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
497 "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); 504 "(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
498 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 505 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
@@ -512,26 +519,26 @@ static int ecryptfs_write_end(struct file *file,
512 "zeros in page with index = [0x%.16lx]\n", index); 519 "zeros in page with index = [0x%.16lx]\n", index);
513 goto out; 520 goto out;
514 } 521 }
515 rc = ecryptfs_encrypt_page(page); 522 set_page_dirty(page);
516 if (rc) { 523 unlock_page(page);
517 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " 524 need_unlock_page = 0;
518 "index [0x%.16lx])\n", index);
519 goto out;
520 }
521 if (pos + copied > i_size_read(ecryptfs_inode)) { 525 if (pos + copied > i_size_read(ecryptfs_inode)) {
522 i_size_write(ecryptfs_inode, pos + copied); 526 i_size_write(ecryptfs_inode, pos + copied);
523 ecryptfs_printk(KERN_DEBUG, "Expanded file size to " 527 ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
524 "[0x%.16llx]\n", 528 "[0x%.16llx]\n",
525 (unsigned long long)i_size_read(ecryptfs_inode)); 529 (unsigned long long)i_size_read(ecryptfs_inode));
530 balance_dirty_pages_ratelimited(mapping);
531 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
532 if (rc) {
533 printk(KERN_ERR "Error writing inode size to metadata; "
534 "rc = [%d]\n", rc);
535 goto out;
536 }
526 } 537 }
527 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); 538 rc = copied;
528 if (rc)
529 printk(KERN_ERR "Error writing inode size to metadata; "
530 "rc = [%d]\n", rc);
531 else
532 rc = copied;
533out: 539out:
534 unlock_page(page); 540 if (need_unlock_page)
541 unlock_page(page);
535 page_cache_release(page); 542 page_cache_release(page);
536 return rc; 543 return rc;
537} 544}
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index db184ef15d3d..85d430963116 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -44,15 +44,11 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
44 ssize_t rc; 44 ssize_t rc;
45 45
46 inode_info = ecryptfs_inode_to_private(ecryptfs_inode); 46 inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
47 mutex_lock(&inode_info->lower_file_mutex);
48 BUG_ON(!inode_info->lower_file); 47 BUG_ON(!inode_info->lower_file);
49 inode_info->lower_file->f_pos = offset;
50 fs_save = get_fs(); 48 fs_save = get_fs();
51 set_fs(get_ds()); 49 set_fs(get_ds());
52 rc = vfs_write(inode_info->lower_file, data, size, 50 rc = vfs_write(inode_info->lower_file, data, size, &offset);
53 &inode_info->lower_file->f_pos);
54 set_fs(fs_save); 51 set_fs(fs_save);
55 mutex_unlock(&inode_info->lower_file_mutex);
56 mark_inode_dirty_sync(ecryptfs_inode); 52 mark_inode_dirty_sync(ecryptfs_inode);
57 return rc; 53 return rc;
58} 54}
@@ -234,15 +230,11 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
234 mm_segment_t fs_save; 230 mm_segment_t fs_save;
235 ssize_t rc; 231 ssize_t rc;
236 232
237 mutex_lock(&inode_info->lower_file_mutex);
238 BUG_ON(!inode_info->lower_file); 233 BUG_ON(!inode_info->lower_file);
239 inode_info->lower_file->f_pos = offset;
240 fs_save = get_fs(); 234 fs_save = get_fs();
241 set_fs(get_ds()); 235 set_fs(get_ds());
242 rc = vfs_read(inode_info->lower_file, data, size, 236 rc = vfs_read(inode_info->lower_file, data, size, &offset);
243 &inode_info->lower_file->f_pos);
244 set_fs(fs_save); 237 set_fs(fs_save);
245 mutex_unlock(&inode_info->lower_file_mutex);
246 return rc; 238 return rc;
247} 239}
248 240
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 3042fe123a34..245b517bf1b6 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -56,6 +56,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
56 goto out; 56 goto out;
57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat); 57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
58 mutex_init(&inode_info->lower_file_mutex); 58 mutex_init(&inode_info->lower_file_mutex);
59 atomic_set(&inode_info->lower_file_count, 0);
59 inode_info->lower_file = NULL; 60 inode_info->lower_file = NULL;
60 inode = &inode_info->vfs_inode; 61 inode = &inode_info->vfs_inode;
61out: 62out:
@@ -78,8 +79,7 @@ static void ecryptfs_i_callback(struct rcu_head *head)
78 * 79 *
79 * This is used during the final destruction of the inode. All 80 * This is used during the final destruction of the inode. All
80 * allocation of memory related to the inode, including allocated 81 * allocation of memory related to the inode, including allocated
81 * memory in the crypt_stat struct, will be released here. This 82 * memory in the crypt_stat struct, will be released here.
82 * function also fput()'s the persistent file for the lower inode.
83 * There should be no chance that this deallocation will be missed. 83 * There should be no chance that this deallocation will be missed.
84 */ 84 */
85static void ecryptfs_destroy_inode(struct inode *inode) 85static void ecryptfs_destroy_inode(struct inode *inode)
@@ -87,16 +87,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
87 struct ecryptfs_inode_info *inode_info; 87 struct ecryptfs_inode_info *inode_info;
88 88
89 inode_info = ecryptfs_inode_to_private(inode); 89 inode_info = ecryptfs_inode_to_private(inode);
90 if (inode_info->lower_file) { 90 BUG_ON(inode_info->lower_file);
91 struct dentry *lower_dentry =
92 inode_info->lower_file->f_dentry;
93
94 BUG_ON(!lower_dentry);
95 if (lower_dentry->d_inode) {
96 fput(inode_info->lower_file);
97 inode_info->lower_file = NULL;
98 }
99 }
100 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 91 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
101 call_rcu(&inode->i_rcu, ecryptfs_i_callback); 92 call_rcu(&inode->i_rcu, ecryptfs_i_callback);
102} 93}
@@ -198,7 +189,7 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
198const struct super_operations ecryptfs_sops = { 189const struct super_operations ecryptfs_sops = {
199 .alloc_inode = ecryptfs_alloc_inode, 190 .alloc_inode = ecryptfs_alloc_inode,
200 .destroy_inode = ecryptfs_destroy_inode, 191 .destroy_inode = ecryptfs_destroy_inode,
201 .drop_inode = generic_delete_inode, 192 .drop_inode = generic_drop_inode,
202 .statfs = ecryptfs_statfs, 193 .statfs = ecryptfs_statfs,
203 .remount_fs = NULL, 194 .remount_fs = NULL,
204 .evict_inode = ecryptfs_evict_inode, 195 .evict_inode = ecryptfs_evict_inode,
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index a8e7797b9477..9c13412e6c99 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -23,7 +23,6 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block)
23} 23}
24static const struct address_space_operations efs_aops = { 24static const struct address_space_operations efs_aops = {
25 .readpage = efs_readpage, 25 .readpage = efs_readpage,
26 .sync_page = block_sync_page,
27 .bmap = _efs_bmap 26 .bmap = _efs_bmap
28}; 27};
29 28
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ff12f7ac73ef..f9cfd168fbe2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -181,7 +181,7 @@ struct eventpoll {
181 181
182 /* 182 /*
183 * This is a single linked list that chains all the "struct epitem" that 183 * This is a single linked list that chains all the "struct epitem" that
184 * happened while transfering ready events to userspace w/out 184 * happened while transferring ready events to userspace w/out
185 * holding ->lock. 185 * holding ->lock.
186 */ 186 */
187 struct epitem *ovflist; 187 struct epitem *ovflist;
@@ -316,6 +316,19 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
316} 316}
317 317
318/** 318/**
319 * ep_events_available - Checks if ready events might be available.
320 *
321 * @ep: Pointer to the eventpoll context.
322 *
323 * Returns: Returns a value different than zero if ready events are available,
324 * or zero otherwise.
325 */
326static inline int ep_events_available(struct eventpoll *ep)
327{
328 return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
329}
330
331/**
319 * ep_call_nested - Perform a bound (possibly) nested call, by checking 332 * ep_call_nested - Perform a bound (possibly) nested call, by checking
320 * that the recursion limit is not exceeded, and that 333 * that the recursion limit is not exceeded, and that
321 * the same nested call (by the meaning of same cookie) is 334 * the same nested call (by the meaning of same cookie) is
@@ -593,7 +606,7 @@ static void ep_free(struct eventpoll *ep)
593 * We do not need to hold "ep->mtx" here because the epoll file 606 * We do not need to hold "ep->mtx" here because the epoll file
594 * is on the way to be removed and no one has references to it 607 * is on the way to be removed and no one has references to it
595 * anymore. The only hit might come from eventpoll_release_file() but 608 * anymore. The only hit might come from eventpoll_release_file() but
596 * holding "epmutex" is sufficent here. 609 * holding "epmutex" is sufficient here.
597 */ 610 */
598 mutex_lock(&epmutex); 611 mutex_lock(&epmutex);
599 612
@@ -707,7 +720,7 @@ void eventpoll_release_file(struct file *file)
707 /* 720 /*
708 * We don't want to get "file->f_lock" because it is not 721 * We don't want to get "file->f_lock" because it is not
709 * necessary. It is not necessary because we're in the "struct file" 722 * necessary. It is not necessary because we're in the "struct file"
710 * cleanup path, and this means that noone is using this file anymore. 723 * cleanup path, and this means that no one is using this file anymore.
711 * So, for example, epoll_ctl() cannot hit here since if we reach this 724 * So, for example, epoll_ctl() cannot hit here since if we reach this
712 * point, the file counter already went to zero and fget() would fail. 725 * point, the file counter already went to zero and fget() would fail.
713 * The only hit might come from ep_free() but by holding the mutex 726 * The only hit might come from ep_free() but by holding the mutex
@@ -1099,7 +1112,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1099 * Trigger mode, we need to insert back inside 1112 * Trigger mode, we need to insert back inside
1100 * the ready list, so that the next call to 1113 * the ready list, so that the next call to
1101 * epoll_wait() will check again the events 1114 * epoll_wait() will check again the events
1102 * availability. At this point, noone can insert 1115 * availability. At this point, no one can insert
1103 * into ep->rdllist besides us. The epoll_ctl() 1116 * into ep->rdllist besides us. The epoll_ctl()
1104 * callers are locked out by 1117 * callers are locked out by
1105 * ep_scan_ready_list() holding "mtx" and the 1118 * ep_scan_ready_list() holding "mtx" and the
@@ -1135,12 +1148,29 @@ static inline struct timespec ep_set_mstimeout(long ms)
1135 return timespec_add_safe(now, ts); 1148 return timespec_add_safe(now, ts);
1136} 1149}
1137 1150
1151/**
1152 * ep_poll - Retrieves ready events, and delivers them to the caller supplied
1153 * event buffer.
1154 *
1155 * @ep: Pointer to the eventpoll context.
1156 * @events: Pointer to the userspace buffer where the ready events should be
1157 * stored.
1158 * @maxevents: Size (in terms of number of events) of the caller event buffer.
1159 * @timeout: Maximum timeout for the ready events fetch operation, in
1160 * milliseconds. If the @timeout is zero, the function will not block,
1161 * while if the @timeout is less than zero, the function will block
1162 * until at least one event has been retrieved (or an error
1163 * occurred).
1164 *
1165 * Returns: Returns the number of ready events which have been fetched, or an
1166 * error code, in case of error.
1167 */
1138static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1168static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1139 int maxevents, long timeout) 1169 int maxevents, long timeout)
1140{ 1170{
1141 int res, eavail, timed_out = 0; 1171 int res = 0, eavail, timed_out = 0;
1142 unsigned long flags; 1172 unsigned long flags;
1143 long slack; 1173 long slack = 0;
1144 wait_queue_t wait; 1174 wait_queue_t wait;
1145 ktime_t expires, *to = NULL; 1175 ktime_t expires, *to = NULL;
1146 1176
@@ -1151,14 +1181,19 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1151 to = &expires; 1181 to = &expires;
1152 *to = timespec_to_ktime(end_time); 1182 *to = timespec_to_ktime(end_time);
1153 } else if (timeout == 0) { 1183 } else if (timeout == 0) {
1184 /*
1185 * Avoid the unnecessary trip to the wait queue loop, if the
1186 * caller specified a non blocking operation.
1187 */
1154 timed_out = 1; 1188 timed_out = 1;
1189 spin_lock_irqsave(&ep->lock, flags);
1190 goto check_events;
1155 } 1191 }
1156 1192
1157retry: 1193fetch_events:
1158 spin_lock_irqsave(&ep->lock, flags); 1194 spin_lock_irqsave(&ep->lock, flags);
1159 1195
1160 res = 0; 1196 if (!ep_events_available(ep)) {
1161 if (list_empty(&ep->rdllist)) {
1162 /* 1197 /*
1163 * We don't have any available event to return to the caller. 1198 * We don't have any available event to return to the caller.
1164 * We need to sleep here, and we will be wake up by 1199 * We need to sleep here, and we will be wake up by
@@ -1174,7 +1209,7 @@ retry:
1174 * to TASK_INTERRUPTIBLE before doing the checks. 1209 * to TASK_INTERRUPTIBLE before doing the checks.
1175 */ 1210 */
1176 set_current_state(TASK_INTERRUPTIBLE); 1211 set_current_state(TASK_INTERRUPTIBLE);
1177 if (!list_empty(&ep->rdllist) || timed_out) 1212 if (ep_events_available(ep) || timed_out)
1178 break; 1213 break;
1179 if (signal_pending(current)) { 1214 if (signal_pending(current)) {
1180 res = -EINTR; 1215 res = -EINTR;
@@ -1191,8 +1226,9 @@ retry:
1191 1226
1192 set_current_state(TASK_RUNNING); 1227 set_current_state(TASK_RUNNING);
1193 } 1228 }
1229check_events:
1194 /* Is it worth to try to dig for events ? */ 1230 /* Is it worth to try to dig for events ? */
1195 eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; 1231 eavail = ep_events_available(ep);
1196 1232
1197 spin_unlock_irqrestore(&ep->lock, flags); 1233 spin_unlock_irqrestore(&ep->lock, flags);
1198 1234
@@ -1203,7 +1239,7 @@ retry:
1203 */ 1239 */
1204 if (!res && eavail && 1240 if (!res && eavail &&
1205 !(res = ep_send_events(ep, events, maxevents)) && !timed_out) 1241 !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
1206 goto retry; 1242 goto fetch_events;
1207 1243
1208 return res; 1244 return res;
1209} 1245}
diff --git a/fs/exec.c b/fs/exec.c
index ba99e1abb1aa..5e62d26a4fec 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1875,7 +1875,7 @@ static void wait_for_dump_helpers(struct file *file)
1875 1875
1876 1876
1877/* 1877/*
1878 * uhm_pipe_setup 1878 * umh_pipe_setup
1879 * helper function to customize the process used 1879 * helper function to customize the process used
1880 * to collect the core in userspace. Specifically 1880 * to collect the core in userspace. Specifically
1881 * it sets up a pipe and installs it as fd 0 (stdin) 1881 * it sets up a pipe and installs it as fd 0 (stdin)
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index f0d520312d8b..3bbd46956d77 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -53,10 +53,14 @@
53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
54 54
55/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
56/* Inode attrs */
56# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) 57# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
57# define EXOFS_ATTR_INODE_DATA 1 58# define EXOFS_ATTR_INODE_DATA 1
58# define EXOFS_ATTR_INODE_FILE_LAYOUT 2 59# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
59# define EXOFS_ATTR_INODE_DIR_LAYOUT 3 60# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
61/* Partition attrs */
62# define EXOFS_APAGE_SB_DATA (0xF0000000U + 3)
63# define EXOFS_ATTR_SB_STATS 1
60 64
61/* 65/*
62 * The maximum number of files we can have is limited by the size of the 66 * The maximum number of files we can have is limited by the size of the
@@ -86,8 +90,8 @@ enum {
86 */ 90 */
87enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; 91enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
88struct exofs_fscb { 92struct exofs_fscb {
89 __le64 s_nextid; /* Highest object ID used */ 93 __le64 s_nextid; /* Only used after mkfs */
90 __le64 s_numfiles; /* Number of files on fs */ 94 __le64 s_numfiles; /* Only used after mkfs */
91 __le32 s_version; /* == EXOFS_FSCB_VER */ 95 __le32 s_version; /* == EXOFS_FSCB_VER */
92 __le16 s_magic; /* Magic signature */ 96 __le16 s_magic; /* Magic signature */
93 __le16 s_newfs; /* Non-zero if this is a new fs */ 97 __le16 s_newfs; /* Non-zero if this is a new fs */
@@ -98,10 +102,20 @@ struct exofs_fscb {
98} __packed; 102} __packed;
99 103
100/* 104/*
105 * This struct is set on the FS partition's attributes.
106 * [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
107 * with the create command, to atomically persist the sb writeable information.
108 */
109struct exofs_sb_stats {
110 __le64 s_nextid; /* Highest object ID used */
111 __le64 s_numfiles; /* Number of files on fs */
112} __packed;
113
114/*
101 * Describes the raid used in the FS. It is part of the device table. 115 * Describes the raid used in the FS. It is part of the device table.
102 * This here is taken from the pNFS-objects definition. In exofs we 116 * This here is taken from the pNFS-objects definition. In exofs we
103 * use one raid policy through-out the filesystem. (NOTE: the funny 117 * use one raid policy through-out the filesystem. (NOTE: the funny
104 * alignment at begining. We take care of it at exofs_device_table. 118 * alignment at beginning. We take care of it at exofs_device_table.
105 */ 119 */
106struct exofs_dt_data_map { 120struct exofs_dt_data_map {
107 __le32 cb_num_comps; 121 __le32 cb_num_comps;
@@ -122,7 +136,7 @@ struct exofs_dt_device_info {
122 u8 systemid[OSD_SYSTEMID_LEN]; 136 u8 systemid[OSD_SYSTEMID_LEN];
123 __le64 long_name_offset; /* If !0 then offset-in-file */ 137 __le64 long_name_offset; /* If !0 then offset-in-file */
124 __le32 osdname_len; /* */ 138 __le32 osdname_len; /* */
125 u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ 139 u8 osdname[44]; /* Embbeded, Usually an asci uuid */
126} __packed; 140} __packed;
127 141
128/* 142/*
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index dcc941d82d67..d0941c6a1f72 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -124,7 +124,7 @@ out:
124 124
125Ebadsize: 125Ebadsize:
126 EXOFS_ERR("ERROR [exofs_check_page]: " 126 EXOFS_ERR("ERROR [exofs_check_page]: "
127 "size of directory #%lu is not a multiple of chunk size", 127 "size of directory(0x%lx) is not a multiple of chunk size\n",
128 dir->i_ino 128 dir->i_ino
129 ); 129 );
130 goto fail; 130 goto fail;
@@ -142,8 +142,8 @@ Espan:
142 goto bad_entry; 142 goto bad_entry;
143bad_entry: 143bad_entry:
144 EXOFS_ERR( 144 EXOFS_ERR(
145 "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - " 145 "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
146 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d", 146 "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, 147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
148 _LLU(le64_to_cpu(p->inode_no)), 148 _LLU(le64_to_cpu(p->inode_no)),
149 rec_len, p->name_len); 149 rec_len, p->name_len);
@@ -151,8 +151,8 @@ bad_entry:
151Eend: 151Eend:
152 p = (struct exofs_dir_entry *)(kaddr + offs); 152 p = (struct exofs_dir_entry *)(kaddr + offs);
153 EXOFS_ERR("ERROR [exofs_check_page]: " 153 EXOFS_ERR("ERROR [exofs_check_page]: "
154 "entry in directory #%lu spans the page boundary" 154 "entry in directory(0x%lx) spans the page boundary"
155 "offset=%lu, inode=%llu", 155 "offset=%lu, inode=0x%llx\n",
156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, 156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
157 _LLU(le64_to_cpu(p->inode_no))); 157 _LLU(le64_to_cpu(p->inode_no)));
158fail: 158fail:
@@ -261,9 +261,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
261 struct page *page = exofs_get_page(inode, n); 261 struct page *page = exofs_get_page(inode, n);
262 262
263 if (IS_ERR(page)) { 263 if (IS_ERR(page)) {
264 EXOFS_ERR("ERROR: " 264 EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
265 "bad page in #%lu", 265 inode->i_ino);
266 inode->i_ino);
267 filp->f_pos += PAGE_CACHE_SIZE - offset; 266 filp->f_pos += PAGE_CACHE_SIZE - offset;
268 return PTR_ERR(page); 267 return PTR_ERR(page);
269 } 268 }
@@ -283,7 +282,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
283 for (; (char *)de <= limit; de = exofs_next_entry(de)) { 282 for (; (char *)de <= limit; de = exofs_next_entry(de)) {
284 if (de->rec_len == 0) { 283 if (de->rec_len == 0) {
285 EXOFS_ERR("ERROR: " 284 EXOFS_ERR("ERROR: "
286 "zero-length directory entry"); 285 "zero-length entry in directory(0x%lx)\n",
286 inode->i_ino);
287 exofs_put_page(page); 287 exofs_put_page(page);
288 return -EIO; 288 return -EIO;
289 } 289 }
@@ -342,9 +342,9 @@ struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
342 kaddr += exofs_last_byte(dir, n) - reclen; 342 kaddr += exofs_last_byte(dir, n) - reclen;
343 while ((char *) de <= kaddr) { 343 while ((char *) de <= kaddr) {
344 if (de->rec_len == 0) { 344 if (de->rec_len == 0) {
345 EXOFS_ERR( 345 EXOFS_ERR("ERROR: zero-length entry in "
346 "ERROR: exofs_find_entry: " 346 "directory(0x%lx)\n",
347 "zero-length directory entry"); 347 dir->i_ino);
348 exofs_put_page(page); 348 exofs_put_page(page);
349 goto out; 349 goto out;
350 } 350 }
@@ -472,7 +472,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
472 } 472 }
473 if (de->rec_len == 0) { 473 if (de->rec_len == 0) {
474 EXOFS_ERR("ERROR: exofs_add_link: " 474 EXOFS_ERR("ERROR: exofs_add_link: "
475 "zero-length directory entry"); 475 "zero-length entry in directory(0x%lx)\n",
476 inode->i_ino);
476 err = -EIO; 477 err = -EIO;
477 goto out_unlock; 478 goto out_unlock;
478 } 479 }
@@ -491,7 +492,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
491 exofs_put_page(page); 492 exofs_put_page(page);
492 } 493 }
493 494
494 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode); 495 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=0x%lx\n",
496 dentry, inode->i_ino);
495 return -EINVAL; 497 return -EINVAL;
496 498
497got_it: 499got_it:
@@ -542,7 +544,8 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
542 while (de < dir) { 544 while (de < dir) {
543 if (de->rec_len == 0) { 545 if (de->rec_len == 0) {
544 EXOFS_ERR("ERROR: exofs_delete_entry:" 546 EXOFS_ERR("ERROR: exofs_delete_entry:"
545 "zero-length directory entry"); 547 "zero-length entry in directory(0x%lx)\n",
548 inode->i_ino);
546 err = -EIO; 549 err = -EIO;
547 goto out; 550 goto out;
548 } 551 }
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 2dc925fa1010..c965806c2821 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -77,7 +77,7 @@ struct exofs_layout {
77 * our extension to the in-memory superblock 77 * our extension to the in-memory superblock
78 */ 78 */
79struct exofs_sb_info { 79struct exofs_sb_info {
80 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ 80 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
81 int s_timeout; /* timeout for OSD operations */ 81 int s_timeout; /* timeout for OSD operations */
82 uint64_t s_nextid; /* highest object ID used */ 82 uint64_t s_nextid; /* highest object ID used */
83 uint32_t s_numfiles; /* number of files on fs */ 83 uint32_t s_numfiles; /* number of files on fs */
@@ -256,6 +256,8 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
256} 256}
257 257
258/* inode.c */ 258/* inode.c */
259unsigned exofs_max_io_pages(struct exofs_layout *layout,
260 unsigned expected_pages);
259int exofs_setattr(struct dentry *, struct iattr *); 261int exofs_setattr(struct dentry *, struct iattr *);
260int exofs_write_begin(struct file *file, struct address_space *mapping, 262int exofs_write_begin(struct file *file, struct address_space *mapping,
261 loff_t pos, unsigned len, unsigned flags, 263 loff_t pos, unsigned len, unsigned flags,
@@ -279,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
279 struct inode *); 281 struct inode *);
280 282
281/* super.c */ 283/* super.c */
282int exofs_sync_fs(struct super_block *sb, int wait); 284int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
283 285
284/********************* 286/*********************
285 * operation vectors * 287 * operation vectors *
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index b905c79b4f0a..45ca323d8363 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -45,22 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
45static int exofs_file_fsync(struct file *filp, int datasync) 45static int exofs_file_fsync(struct file *filp, int datasync)
46{ 46{
47 int ret; 47 int ret;
48 struct inode *inode = filp->f_mapping->host;
49 struct super_block *sb;
50
51 if (!(inode->i_state & I_DIRTY))
52 return 0;
53 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
54 return 0;
55
56 ret = sync_inode_metadata(inode, 1);
57
58 /* This is a good place to write the sb */
59 /* TODO: Sechedule an sb-sync on create */
60 sb = inode->i_sb;
61 if (sb->s_dirt)
62 exofs_sync_fs(sb, 1);
63 48
49 ret = sync_inode_metadata(filp->f_mapping->host, 1);
64 return ret; 50 return ret;
65} 51}
66 52
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a7555238c41a..8472c098445d 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
43 PAGE_SIZE / sizeof(struct page *), 43 PAGE_SIZE / sizeof(struct page *),
44}; 44};
45 45
46unsigned exofs_max_io_pages(struct exofs_layout *layout,
47 unsigned expected_pages)
48{
49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
50
51 /* TODO: easily support bio chaining */
52 pages = min_t(unsigned, pages,
53 layout->group_width * BIO_MAX_PAGES_KMALLOC);
54 return pages;
55}
56
46struct page_collect { 57struct page_collect {
47 struct exofs_sb_info *sbi; 58 struct exofs_sb_info *sbi;
48 struct inode *inode; 59 struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
97 108
98static int pcol_try_alloc(struct page_collect *pcol) 109static int pcol_try_alloc(struct page_collect *pcol)
99{ 110{
100 unsigned pages = min_t(unsigned, pcol->expected_pages, 111 unsigned pages;
101 MAX_PAGES_KMALLOC);
102 112
103 if (!pcol->ios) { /* First time allocate io_state */ 113 if (!pcol->ios) { /* First time allocate io_state */
104 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); 114 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
108 } 118 }
109 119
110 /* TODO: easily support bio chaining */ 120 /* TODO: easily support bio chaining */
111 pages = min_t(unsigned, pages, 121 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
112 pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
113 122
114 for (; pages; pages >>= 1) { 123 for (; pages; pages >>= 1) {
115 pcol->pages = kmalloc(pages * sizeof(struct page *), 124 pcol->pages = kmalloc(pages * sizeof(struct page *),
@@ -350,8 +359,10 @@ static int readpage_strip(void *data, struct page *page)
350 359
351 if (!pcol->read_4_write) 360 if (!pcol->read_4_write)
352 unlock_page(page); 361 unlock_page(page);
353 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 362 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
354 " splitting\n", inode->i_ino, page->index); 363 "read_4_write=%d index=0x%lx end_index=0x%lx "
364 "splitting\n", inode->i_ino, len,
365 pcol->read_4_write, page->index, end_index);
355 366
356 return read_exec(pcol); 367 return read_exec(pcol);
357 } 368 }
@@ -722,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
722 733
723 /* read modify write */ 734 /* read modify write */
724 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 735 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
736 loff_t i_size = i_size_read(mapping->host);
737 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
738 size_t rlen;
739
740 if (page->index < end_index)
741 rlen = PAGE_CACHE_SIZE;
742 else if (page->index == end_index)
743 rlen = i_size & ~PAGE_CACHE_MASK;
744 else
745 rlen = 0;
746
747 if (!rlen) {
748 clear_highpage(page);
749 SetPageUptodate(page);
750 goto out;
751 }
752
725 ret = _readpage(page, true); 753 ret = _readpage(page, true);
726 if (ret) { 754 if (ret) {
727 /*SetPageError was done by _readpage. Is it ok?*/ 755 /*SetPageError was done by _readpage. Is it ok?*/
728 unlock_page(page); 756 unlock_page(page);
729 EXOFS_DBGMSG("__readpage_filler failed\n"); 757 EXOFS_DBGMSG("__readpage failed\n");
730 } 758 }
731 } 759 }
732out: 760out:
@@ -795,7 +823,6 @@ const struct address_space_operations exofs_aops = {
795 .direct_IO = NULL, /* TODO: Should be trivial to do */ 823 .direct_IO = NULL, /* TODO: Should be trivial to do */
796 824
797 /* With these NULL has special meaning or default is not exported */ 825 /* With these NULL has special meaning or default is not exported */
798 .sync_page = NULL,
799 .get_xip_mem = NULL, 826 .get_xip_mem = NULL,
800 .migratepage = NULL, 827 .migratepage = NULL,
801 .launder_page = NULL, 828 .launder_page = NULL,
@@ -1030,6 +1057,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1057 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1031 } 1058 }
1032 1059
1060 inode->i_mapping->backing_dev_info = sb->s_bdi;
1033 if (S_ISREG(inode->i_mode)) { 1061 if (S_ISREG(inode->i_mode)) {
1034 inode->i_op = &exofs_file_inode_operations; 1062 inode->i_op = &exofs_file_inode_operations;
1035 inode->i_fop = &exofs_file_operations; 1063 inode->i_fop = &exofs_file_operations;
@@ -1073,6 +1101,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1073 } 1101 }
1074 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1102 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1075} 1103}
1104
1076/* 1105/*
1077 * Callback function from exofs_new_inode(). The important thing is that we 1106 * Callback function from exofs_new_inode(). The important thing is that we
1078 * set the obj_created flag so that other methods know that the object exists on 1107 * set the obj_created flag so that other methods know that the object exists on
@@ -1130,7 +1159,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1130 1159
1131 sbi = sb->s_fs_info; 1160 sbi = sb->s_fs_info;
1132 1161
1133 sb->s_dirt = 1; 1162 inode->i_mapping->backing_dev_info = sb->s_bdi;
1134 inode_init_owner(inode, dir, mode); 1163 inode_init_owner(inode, dir, mode);
1135 inode->i_ino = sbi->s_nextid++; 1164 inode->i_ino = sbi->s_nextid++;
1136 inode->i_blkbits = EXOFS_BLKSHIFT; 1165 inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1141,6 +1170,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1141 spin_unlock(&sbi->s_next_gen_lock); 1170 spin_unlock(&sbi->s_next_gen_lock);
1142 insert_inode_hash(inode); 1171 insert_inode_hash(inode);
1143 1172
1173 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
1174
1144 mark_inode_dirty(inode); 1175 mark_inode_dirty(inode);
1145 1176
1146 ret = exofs_get_io_state(&sbi->layout, &ios); 1177 ret = exofs_get_io_state(&sbi->layout, &ios);
@@ -1271,7 +1302,8 @@ out:
1271 1302
1272int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1303int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1273{ 1304{
1274 return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 1305 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
1306 return exofs_update_inode(inode, 1);
1275} 1307}
1276 1308
1277/* 1309/*
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8c6c4669b381..06065bd37fc3 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -48,6 +48,7 @@
48 * struct to hold what we get from mount options 48 * struct to hold what we get from mount options
49 */ 49 */
50struct exofs_mountopt { 50struct exofs_mountopt {
51 bool is_osdname;
51 const char *dev_name; 52 const char *dev_name;
52 uint64_t pid; 53 uint64_t pid;
53 int timeout; 54 int timeout;
@@ -56,7 +57,7 @@ struct exofs_mountopt {
56/* 57/*
57 * exofs-specific mount-time options. 58 * exofs-specific mount-time options.
58 */ 59 */
59enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; 60enum { Opt_name, Opt_pid, Opt_to, Opt_err };
60 61
61/* 62/*
62 * Our mount-time options. These should ideally be 64-bit unsigned, but the 63 * Our mount-time options. These should ideally be 64-bit unsigned, but the
@@ -64,6 +65,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
64 * sufficient for most applications now. 65 * sufficient for most applications now.
65 */ 66 */
66static match_table_t tokens = { 67static match_table_t tokens = {
68 {Opt_name, "osdname=%s"},
67 {Opt_pid, "pid=%u"}, 69 {Opt_pid, "pid=%u"},
68 {Opt_to, "to=%u"}, 70 {Opt_to, "to=%u"},
69 {Opt_err, NULL} 71 {Opt_err, NULL}
@@ -94,6 +96,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
94 96
95 token = match_token(p, tokens, args); 97 token = match_token(p, tokens, args);
96 switch (token) { 98 switch (token) {
99 case Opt_name:
100 opts->dev_name = match_strdup(&args[0]);
101 if (unlikely(!opts->dev_name)) {
102 EXOFS_ERR("Error allocating dev_name");
103 return -ENOMEM;
104 }
105 opts->is_osdname = true;
106 break;
97 case Opt_pid: 107 case Opt_pid:
98 if (0 == match_strlcpy(str, &args[0], sizeof(str))) 108 if (0 == match_strlcpy(str, &args[0], sizeof(str)))
99 return -EINVAL; 109 return -EINVAL;
@@ -203,6 +213,101 @@ static void destroy_inodecache(void)
203static const struct super_operations exofs_sops; 213static const struct super_operations exofs_sops;
204static const struct export_operations exofs_export_ops; 214static const struct export_operations exofs_export_ops;
205 215
216static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
217 EXOFS_APAGE_SB_DATA,
218 EXOFS_ATTR_SB_STATS,
219 sizeof(struct exofs_sb_stats));
220
221static int __sbi_read_stats(struct exofs_sb_info *sbi)
222{
223 struct osd_attr attrs[] = {
224 [0] = g_attr_sb_stats,
225 };
226 struct exofs_io_state *ios;
227 int ret;
228
229 ret = exofs_get_io_state(&sbi->layout, &ios);
230 if (unlikely(ret)) {
231 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
232 return ret;
233 }
234
235 ios->cred = sbi->s_cred;
236
237 ios->in_attr = attrs;
238 ios->in_attr_len = ARRAY_SIZE(attrs);
239
240 ret = exofs_sbi_read(ios);
241 if (unlikely(ret)) {
242 EXOFS_ERR("Error reading super_block stats => %d\n", ret);
243 goto out;
244 }
245
246 ret = extract_attr_from_ios(ios, &attrs[0]);
247 if (ret) {
248 EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__);
249 goto out;
250 }
251 if (attrs[0].len) {
252 struct exofs_sb_stats *ess;
253
254 if (unlikely(attrs[0].len != sizeof(*ess))) {
255 EXOFS_ERR("%s: Wrong version of exofs_sb_stats "
256 "size(%d) != expected(%zd)\n",
257 __func__, attrs[0].len, sizeof(*ess));
258 goto out;
259 }
260
261 ess = attrs[0].val_ptr;
262 sbi->s_nextid = le64_to_cpu(ess->s_nextid);
263 sbi->s_numfiles = le32_to_cpu(ess->s_numfiles);
264 }
265
266out:
267 exofs_put_io_state(ios);
268 return ret;
269}
270
271static void stats_done(struct exofs_io_state *ios, void *p)
272{
273 exofs_put_io_state(ios);
274 /* Good thanks nothing to do anymore */
275}
276
277/* Asynchronously write the stats attribute */
278int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
279{
280 struct osd_attr attrs[] = {
281 [0] = g_attr_sb_stats,
282 };
283 struct exofs_io_state *ios;
284 int ret;
285
286 ret = exofs_get_io_state(&sbi->layout, &ios);
287 if (unlikely(ret)) {
288 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
289 return ret;
290 }
291
292 sbi->s_ess.s_nextid = cpu_to_le64(sbi->s_nextid);
293 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
294 attrs[0].val_ptr = &sbi->s_ess;
295
296 ios->cred = sbi->s_cred;
297 ios->done = stats_done;
298 ios->private = sbi;
299 ios->out_attr = attrs;
300 ios->out_attr_len = ARRAY_SIZE(attrs);
301
302 ret = exofs_sbi_write(ios);
303 if (unlikely(ret)) {
304 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
305 exofs_put_io_state(ios);
306 }
307
308 return ret;
309}
310
206/* 311/*
207 * Write the superblock to the OSD 312 * Write the superblock to the OSD
208 */ 313 */
@@ -213,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait)
213 struct exofs_io_state *ios; 318 struct exofs_io_state *ios;
214 int ret = -ENOMEM; 319 int ret = -ENOMEM;
215 320
216 lock_super(sb); 321 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
322 if (unlikely(!fscb))
323 return -ENOMEM;
324
217 sbi = sb->s_fs_info; 325 sbi = sb->s_fs_info;
218 fscb = &sbi->s_fscb;
219 326
327 /* NOTE: We no longer dirty the super_block anywhere in exofs. The
328 * reason we write the fscb here on unmount is so we can stay backwards
329 * compatible with fscb->s_version == 1. (What we are not compatible
330 * with is if a new version FS crashed and then we try to mount an old
331 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
332 * the writeable info is set in exofs_sbi_write_stats() above.
333 */
220 ret = exofs_get_io_state(&sbi->layout, &ios); 334 ret = exofs_get_io_state(&sbi->layout, &ios);
221 if (ret) 335 if (unlikely(ret))
222 goto out; 336 goto out;
223 337
224 /* Note: We only write the changing part of the fscb. .i.e upto the 338 lock_super(sb);
225 * the fscb->s_dev_table_oid member. There is no read-modify-write 339
226 * here.
227 */
228 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); 340 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
229 memset(fscb, 0, ios->length); 341 memset(fscb, 0, ios->length);
230 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 342 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -239,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait)
239 ios->cred = sbi->s_cred; 351 ios->cred = sbi->s_cred;
240 352
241 ret = exofs_sbi_write(ios); 353 ret = exofs_sbi_write(ios);
242 if (unlikely(ret)) { 354 if (unlikely(ret))
243 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 355 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
244 goto out; 356 else
245 } 357 sb->s_dirt = 0;
246 sb->s_dirt = 0; 358
247 359
360 unlock_super(sb);
248out: 361out:
249 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 362 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
250 exofs_put_io_state(ios); 363 exofs_put_io_state(ios);
251 unlock_super(sb); 364 kfree(fscb);
252 return ret; 365 return ret;
253} 366}
254 367
@@ -292,13 +405,14 @@ static void exofs_put_super(struct super_block *sb)
292 int num_pend; 405 int num_pend;
293 struct exofs_sb_info *sbi = sb->s_fs_info; 406 struct exofs_sb_info *sbi = sb->s_fs_info;
294 407
295 if (sb->s_dirt)
296 exofs_write_super(sb);
297
298 /* make sure there are no pending commands */ 408 /* make sure there are no pending commands */
299 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; 409 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
300 num_pend = atomic_read(&sbi->s_curr_pending)) { 410 num_pend = atomic_read(&sbi->s_curr_pending)) {
301 wait_queue_head_t wq; 411 wait_queue_head_t wq;
412
413 printk(KERN_NOTICE "%s: !!Pending operations in flight. "
414 "This is a BUG. please report to osd-dev@open-osd.org\n",
415 __func__);
302 init_waitqueue_head(&wq); 416 init_waitqueue_head(&wq);
303 wait_event_timeout(wq, 417 wait_event_timeout(wq,
304 (atomic_read(&sbi->s_curr_pending) == 0), 418 (atomic_read(&sbi->s_curr_pending) == 0),
@@ -390,6 +504,23 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
390 return 0; 504 return 0;
391} 505}
392 506
507static unsigned __ra_pages(struct exofs_layout *layout)
508{
509 const unsigned _MIN_RA = 32; /* min 128K read-ahead */
510 unsigned ra_pages = layout->group_width * layout->stripe_unit /
511 PAGE_SIZE;
512 unsigned max_io_pages = exofs_max_io_pages(layout, ~0);
513
514 ra_pages *= 2; /* two stripes */
515 if (ra_pages < _MIN_RA)
516 ra_pages = roundup(_MIN_RA, ra_pages / 2);
517
518 if (ra_pages > max_io_pages)
519 ra_pages = max_io_pages;
520
521 return ra_pages;
522}
523
393/* @odi is valid only as long as @fscb_dev is valid */ 524/* @odi is valid only as long as @fscb_dev is valid */
394static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, 525static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
395 struct osd_dev_info *odi) 526 struct osd_dev_info *odi)
@@ -495,7 +626,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
495 } 626 }
496 627
497 od = osduld_info_lookup(&odi); 628 od = osduld_info_lookup(&odi);
498 if (unlikely(IS_ERR(od))) { 629 if (IS_ERR(od)) {
499 ret = PTR_ERR(od); 630 ret = PTR_ERR(od);
500 EXOFS_ERR("ERROR: device requested is not found " 631 EXOFS_ERR("ERROR: device requested is not found "
501 "osd_name-%s =>%d\n", odi.osdname, ret); 632 "osd_name-%s =>%d\n", odi.osdname, ret);
@@ -558,9 +689,17 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
558 goto free_bdi; 689 goto free_bdi;
559 690
560 /* use mount options to fill superblock */ 691 /* use mount options to fill superblock */
561 od = osduld_path_lookup(opts->dev_name); 692 if (opts->is_osdname) {
693 struct osd_dev_info odi = {.systemid_len = 0};
694
695 odi.osdname_len = strlen(opts->dev_name);
696 odi.osdname = (u8 *)opts->dev_name;
697 od = osduld_info_lookup(&odi);
698 } else {
699 od = osduld_path_lookup(opts->dev_name);
700 }
562 if (IS_ERR(od)) { 701 if (IS_ERR(od)) {
563 ret = PTR_ERR(od); 702 ret = -EINVAL;
564 goto free_sbi; 703 goto free_sbi;
565 } 704 }
566 705
@@ -594,6 +733,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
594 goto free_sbi; 733 goto free_sbi;
595 734
596 sb->s_magic = le16_to_cpu(fscb.s_magic); 735 sb->s_magic = le16_to_cpu(fscb.s_magic);
736 /* NOTE: we read below to be backward compatible with old versions */
597 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); 737 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
598 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); 738 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
599 739
@@ -604,7 +744,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
604 ret = -EINVAL; 744 ret = -EINVAL;
605 goto free_sbi; 745 goto free_sbi;
606 } 746 }
607 if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { 747 if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {
608 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", 748 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
609 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); 749 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
610 ret = -EINVAL; 750 ret = -EINVAL;
@@ -622,7 +762,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
622 goto free_sbi; 762 goto free_sbi;
623 } 763 }
624 764
765 __sbi_read_stats(sbi);
766
625 /* set up operation vectors */ 767 /* set up operation vectors */
768 sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
626 sb->s_bdi = &sbi->bdi; 769 sb->s_bdi = &sbi->bdi;
627 sb->s_fs_info = sbi; 770 sb->s_fs_info = sbi;
628 sb->s_op = &exofs_sops; 771 sb->s_op = &exofs_sops;
@@ -652,6 +795,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
652 795
653 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 796 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
654 sbi->layout.s_pid); 797 sbi->layout.s_pid);
798 if (opts->is_osdname)
799 kfree(opts->dev_name);
655 return 0; 800 return 0;
656 801
657free_sbi: 802free_sbi:
@@ -660,6 +805,8 @@ free_bdi:
660 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 805 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
661 opts->dev_name, sbi->layout.s_pid, ret); 806 opts->dev_name, sbi->layout.s_pid, ret);
662 exofs_free_sbi(sbi); 807 exofs_free_sbi(sbi);
808 if (opts->is_osdname)
809 kfree(opts->dev_name);
663 return ret; 810 return ret;
664} 811}
665 812
@@ -677,7 +824,8 @@ static struct dentry *exofs_mount(struct file_system_type *type,
677 if (ret) 824 if (ret)
678 return ERR_PTR(ret); 825 return ERR_PTR(ret);
679 826
680 opts.dev_name = dev_name; 827 if (!opts.dev_name)
828 opts.dev_name = dev_name;
681 return mount_nodev(type, flags, &opts, exofs_fill_super); 829 return mount_nodev(type, flags, &opts, exofs_fill_super);
682} 830}
683 831
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 7b4180554a62..abea5a17c764 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -406,7 +406,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
406 return -EINVAL; 406 return -EINVAL;
407 if (!test_opt(dentry->d_sb, POSIX_ACL)) 407 if (!test_opt(dentry->d_sb, POSIX_ACL))
408 return -EOPNOTSUPP; 408 return -EOPNOTSUPP;
409 if (!is_owner_or_cap(dentry->d_inode)) 409 if (!inode_owner_or_capable(dentry->d_inode))
410 return -EPERM; 410 return -EPERM;
411 411
412 if (value) { 412 if (value) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 0d06f4e75699..8f44cef1b3ef 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -850,7 +850,7 @@ static int find_next_reservable_window(
850 rsv_window_remove(sb, my_rsv); 850 rsv_window_remove(sb, my_rsv);
851 851
852 /* 852 /*
853 * Let's book the whole avaliable window for now. We will check the 853 * Let's book the whole available window for now. We will check the
854 * disk bitmap later and then, if there are free blocks then we adjust 854 * disk bitmap later and then, if there are free blocks then we adjust
855 * the window size if it's larger than requested. 855 * the window size if it's larger than requested.
856 * Otherwise, we will remove this node from the tree next time 856 * Otherwise, we will remove this node from the tree next time
@@ -1357,9 +1357,9 @@ retry_alloc:
1357 goto allocated; 1357 goto allocated;
1358 } 1358 }
1359 /* 1359 /*
1360 * We may end up a bogus ealier ENOSPC error due to 1360 * We may end up a bogus earlier ENOSPC error due to
1361 * filesystem is "full" of reservations, but 1361 * filesystem is "full" of reservations, but
1362 * there maybe indeed free blocks avaliable on disk 1362 * there maybe indeed free blocks available on disk
1363 * In this case, we just forget about the reservations 1363 * In this case, we just forget about the reservations
1364 * just do block allocation as without reservations. 1364 * just do block allocation as without reservations.
1365 */ 1365 */
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 1b48c3370872..645be9e7ee47 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -174,3 +174,9 @@ ext2_group_first_block_no(struct super_block *sb, unsigned long group_no)
174 return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + 174 return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) +
175 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); 175 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block);
176} 176}
177
178#define ext2_set_bit __test_and_set_bit_le
179#define ext2_clear_bit __test_and_clear_bit_le
180#define ext2_test_bit test_bit_le
181#define ext2_find_first_zero_bit find_first_zero_bit_le
182#define ext2_find_next_zero_bit find_next_zero_bit_le
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 40ad210a5049..788e09a07f7e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -305,7 +305,7 @@ static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind)
305 return ind->bh->b_blocknr; 305 return ind->bh->b_blocknr;
306 306
307 /* 307 /*
308 * It is going to be refered from inode itself? OK, just put it into 308 * It is going to be referred from inode itself? OK, just put it into
309 * the same cylinder group then. 309 * the same cylinder group then.
310 */ 310 */
311 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group); 311 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group);
@@ -860,7 +860,6 @@ const struct address_space_operations ext2_aops = {
860 .readpage = ext2_readpage, 860 .readpage = ext2_readpage,
861 .readpages = ext2_readpages, 861 .readpages = ext2_readpages,
862 .writepage = ext2_writepage, 862 .writepage = ext2_writepage,
863 .sync_page = block_sync_page,
864 .write_begin = ext2_write_begin, 863 .write_begin = ext2_write_begin,
865 .write_end = ext2_write_end, 864 .write_end = ext2_write_end,
866 .bmap = ext2_bmap, 865 .bmap = ext2_bmap,
@@ -880,7 +879,6 @@ const struct address_space_operations ext2_nobh_aops = {
880 .readpage = ext2_readpage, 879 .readpage = ext2_readpage,
881 .readpages = ext2_readpages, 880 .readpages = ext2_readpages,
882 .writepage = ext2_nobh_writepage, 881 .writepage = ext2_nobh_writepage,
883 .sync_page = block_sync_page,
884 .write_begin = ext2_nobh_write_begin, 882 .write_begin = ext2_nobh_write_begin,
885 .write_end = nobh_write_end, 883 .write_end = nobh_write_end,
886 .bmap = ext2_bmap, 884 .bmap = ext2_bmap,
@@ -915,7 +913,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
915 * 913 *
916 * When we do truncate() we may have to clean the ends of several indirect 914 * When we do truncate() we may have to clean the ends of several indirect
917 * blocks but leave the blocks themselves alive. Block is partially 915 * blocks but leave the blocks themselves alive. Block is partially
918 * truncated if some data below the new i_size is refered from it (and 916 * truncated if some data below the new i_size is referred from it (and
919 * it is on the path to the first completely truncated data block, indeed). 917 * it is on the path to the first completely truncated data block, indeed).
920 * We have to free the top of that path along with everything to the right 918 * We have to free the top of that path along with everything to the right
921 * of the path. Since no allocation past the truncation point is possible 919 * of the path. Since no allocation past the truncation point is possible
@@ -992,7 +990,7 @@ no_top:
992 * @p: array of block numbers 990 * @p: array of block numbers
993 * @q: points immediately past the end of array 991 * @q: points immediately past the end of array
994 * 992 *
995 * We are freeing all blocks refered from that array (numbers are 993 * We are freeing all blocks referred from that array (numbers are
996 * stored as little-endian 32-bit) and updating @inode->i_blocks 994 * stored as little-endian 32-bit) and updating @inode->i_blocks
997 * appropriately. 995 * appropriately.
998 */ 996 */
@@ -1032,7 +1030,7 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1032 * @q: pointer immediately past the end of array 1030 * @q: pointer immediately past the end of array
1033 * @depth: depth of the branches to free 1031 * @depth: depth of the branches to free
1034 * 1032 *
1035 * We are freeing all blocks refered from these branches (numbers are 1033 * We are freeing all blocks referred from these branches (numbers are
1036 * stored as little-endian 32-bit) and updating @inode->i_blocks 1034 * stored as little-endian 32-bit) and updating @inode->i_blocks
1037 * appropriately. 1035 * appropriately.
1038 */ 1036 */
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index e7431309bdca..f81e250ac5c4 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -39,7 +39,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
39 if (ret) 39 if (ret)
40 return ret; 40 return ret;
41 41
42 if (!is_owner_or_cap(inode)) { 42 if (!inode_owner_or_capable(inode)) {
43 ret = -EACCES; 43 ret = -EACCES;
44 goto setflags_out; 44 goto setflags_out;
45 } 45 }
@@ -89,7 +89,7 @@ setflags_out:
89 case EXT2_IOC_GETVERSION: 89 case EXT2_IOC_GETVERSION:
90 return put_user(inode->i_generation, (int __user *) arg); 90 return put_user(inode->i_generation, (int __user *) arg);
91 case EXT2_IOC_SETVERSION: 91 case EXT2_IOC_SETVERSION:
92 if (!is_owner_or_cap(inode)) 92 if (!inode_owner_or_capable(inode))
93 return -EPERM; 93 return -EPERM;
94 ret = mnt_want_write(filp->f_path.mnt); 94 ret = mnt_want_write(filp->f_path.mnt);
95 if (ret) 95 if (ret)
@@ -115,7 +115,7 @@ setflags_out:
115 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 115 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
116 return -ENOTTY; 116 return -ENOTTY;
117 117
118 if (!is_owner_or_cap(inode)) 118 if (!inode_owner_or_capable(inode))
119 return -EACCES; 119 return -EACCES;
120 120
121 if (get_user(rsv_window_size, (int __user *)arg)) 121 if (get_user(rsv_window_size, (int __user *)arg))
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7731695e65d9..0a78dae7e2cb 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1382,7 +1382,7 @@ static struct dentry *ext2_mount(struct file_system_type *fs_type,
1382 1382
1383/* Read data from quotafile - avoid pagecache and such because we cannot afford 1383/* Read data from quotafile - avoid pagecache and such because we cannot afford
1384 * acquiring the locks... As quota files are never truncated and quota code 1384 * acquiring the locks... As quota files are never truncated and quota code
1385 * itself serializes the operations (and noone else should touch the files) 1385 * itself serializes the operations (and no one else should touch the files)
1386 * we don't have to be afraid of races */ 1386 * we don't have to be afraid of races */
1387static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, 1387static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
1388 size_t len, loff_t off) 1388 size_t len, loff_t off)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index c2e4dce984d2..529970617a21 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -35,7 +35,7 @@
35 * +------------------+ 35 * +------------------+
36 * 36 *
37 * The block header is followed by multiple entry descriptors. These entry 37 * The block header is followed by multiple entry descriptors. These entry
38 * descriptors are variable in size, and alligned to EXT2_XATTR_PAD 38 * descriptors are variable in size, and aligned to EXT2_XATTR_PAD
39 * byte boundaries. The entry descriptors are sorted by attribute name, 39 * byte boundaries. The entry descriptors are sorted by attribute name,
40 * so that two extended attribute blocks can be compared efficiently. 40 * so that two extended attribute blocks can be compared efficiently.
41 * 41 *
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e4fa49e6c539..9d021c0d472a 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -435,7 +435,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
435 return -EINVAL; 435 return -EINVAL;
436 if (!test_opt(inode->i_sb, POSIX_ACL)) 436 if (!test_opt(inode->i_sb, POSIX_ACL))
437 return -EOPNOTSUPP; 437 return -EOPNOTSUPP;
438 if (!is_owner_or_cap(inode)) 438 if (!inode_owner_or_capable(inode))
439 return -EPERM; 439 return -EPERM;
440 440
441 if (value) { 441 if (value) {
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 153242187fce..fe52297e31ad 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -590,7 +590,7 @@ do_more:
590 BUFFER_TRACE(debug_bh, "Deleted!"); 590 BUFFER_TRACE(debug_bh, "Deleted!");
591 if (!bh2jh(bitmap_bh)->b_committed_data) 591 if (!bh2jh(bitmap_bh)->b_committed_data)
592 BUFFER_TRACE(debug_bh, 592 BUFFER_TRACE(debug_bh,
593 "No commited data in bitmap"); 593 "No committed data in bitmap");
594 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); 594 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
595 __brelse(debug_bh); 595 __brelse(debug_bh);
596 } 596 }
@@ -1063,7 +1063,7 @@ static int find_next_reservable_window(
1063 rsv_window_remove(sb, my_rsv); 1063 rsv_window_remove(sb, my_rsv);
1064 1064
1065 /* 1065 /*
1066 * Let's book the whole avaliable window for now. We will check the 1066 * Let's book the whole available window for now. We will check the
1067 * disk bitmap later and then, if there are free blocks then we adjust 1067 * disk bitmap later and then, if there are free blocks then we adjust
1068 * the window size if it's larger than requested. 1068 * the window size if it's larger than requested.
1069 * Otherwise, we will remove this node from the tree next time 1069 * Otherwise, we will remove this node from the tree next time
@@ -1456,7 +1456,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1456 * 1456 *
1457 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if 1457 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
1458 * it is profitable to retry the operation, this function will wait 1458 * it is profitable to retry the operation, this function will wait
1459 * for the current or commiting transaction to complete, and then 1459 * for the current or committing transaction to complete, and then
1460 * return TRUE. 1460 * return TRUE.
1461 * 1461 *
1462 * if the total number of retries exceed three times, return FALSE. 1462 * if the total number of retries exceed three times, return FALSE.
@@ -1632,9 +1632,9 @@ retry_alloc:
1632 goto allocated; 1632 goto allocated;
1633 } 1633 }
1634 /* 1634 /*
1635 * We may end up a bogus ealier ENOSPC error due to 1635 * We may end up a bogus earlier ENOSPC error due to
1636 * filesystem is "full" of reservations, but 1636 * filesystem is "full" of reservations, but
1637 * there maybe indeed free blocks avaliable on disk 1637 * there maybe indeed free blocks available on disk
1638 * In this case, we just forget about the reservations 1638 * In this case, we just forget about the reservations
1639 * just do block allocation as without reservations. 1639 * just do block allocation as without reservations.
1640 */ 1640 */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ae94f6d949f5..68b2e43d7c35 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1894,7 +1894,6 @@ static const struct address_space_operations ext3_ordered_aops = {
1894 .readpage = ext3_readpage, 1894 .readpage = ext3_readpage,
1895 .readpages = ext3_readpages, 1895 .readpages = ext3_readpages,
1896 .writepage = ext3_ordered_writepage, 1896 .writepage = ext3_ordered_writepage,
1897 .sync_page = block_sync_page,
1898 .write_begin = ext3_write_begin, 1897 .write_begin = ext3_write_begin,
1899 .write_end = ext3_ordered_write_end, 1898 .write_end = ext3_ordered_write_end,
1900 .bmap = ext3_bmap, 1899 .bmap = ext3_bmap,
@@ -1910,7 +1909,6 @@ static const struct address_space_operations ext3_writeback_aops = {
1910 .readpage = ext3_readpage, 1909 .readpage = ext3_readpage,
1911 .readpages = ext3_readpages, 1910 .readpages = ext3_readpages,
1912 .writepage = ext3_writeback_writepage, 1911 .writepage = ext3_writeback_writepage,
1913 .sync_page = block_sync_page,
1914 .write_begin = ext3_write_begin, 1912 .write_begin = ext3_write_begin,
1915 .write_end = ext3_writeback_write_end, 1913 .write_end = ext3_writeback_write_end,
1916 .bmap = ext3_bmap, 1914 .bmap = ext3_bmap,
@@ -1926,7 +1924,6 @@ static const struct address_space_operations ext3_journalled_aops = {
1926 .readpage = ext3_readpage, 1924 .readpage = ext3_readpage,
1927 .readpages = ext3_readpages, 1925 .readpages = ext3_readpages,
1928 .writepage = ext3_journalled_writepage, 1926 .writepage = ext3_journalled_writepage,
1929 .sync_page = block_sync_page,
1930 .write_begin = ext3_write_begin, 1927 .write_begin = ext3_write_begin,
1931 .write_end = ext3_journalled_write_end, 1928 .write_end = ext3_journalled_write_end,
1932 .set_page_dirty = ext3_journalled_set_page_dirty, 1929 .set_page_dirty = ext3_journalled_set_page_dirty,
@@ -2058,7 +2055,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
2058 * 2055 *
2059 * When we do truncate() we may have to clean the ends of several 2056 * When we do truncate() we may have to clean the ends of several
2060 * indirect blocks but leave the blocks themselves alive. Block is 2057 * indirect blocks but leave the blocks themselves alive. Block is
2061 * partially truncated if some data below the new i_size is refered 2058 * partially truncated if some data below the new i_size is referred
2062 * from it (and it is on the path to the first completely truncated 2059 * from it (and it is on the path to the first completely truncated
2063 * data block, indeed). We have to free the top of that path along 2060 * data block, indeed). We have to free the top of that path along
2064 * with everything to the right of the path. Since no allocation 2061 * with everything to the right of the path. Since no allocation
@@ -2187,7 +2184,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
2187 * @first: array of block numbers 2184 * @first: array of block numbers
2188 * @last: points immediately past the end of array 2185 * @last: points immediately past the end of array
2189 * 2186 *
2190 * We are freeing all blocks refered from that array (numbers are stored as 2187 * We are freeing all blocks referred from that array (numbers are stored as
2191 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 2188 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
2192 * 2189 *
2193 * We accumulate contiguous runs of blocks to free. Conveniently, if these 2190 * We accumulate contiguous runs of blocks to free. Conveniently, if these
@@ -2275,7 +2272,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2275 * @last: pointer immediately past the end of array 2272 * @last: pointer immediately past the end of array
2276 * @depth: depth of the branches to free 2273 * @depth: depth of the branches to free
2277 * 2274 *
2278 * We are freeing all blocks refered from these branches (numbers are 2275 * We are freeing all blocks referred from these branches (numbers are
2279 * stored as little-endian 32-bit) and updating @inode->i_blocks 2276 * stored as little-endian 32-bit) and updating @inode->i_blocks
2280 * appropriately. 2277 * appropriately.
2281 */ 2278 */
@@ -3294,7 +3291,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3294 if (ext3_should_journal_data(inode)) 3291 if (ext3_should_journal_data(inode))
3295 ret = 3 * (bpp + indirects) + 2; 3292 ret = 3 * (bpp + indirects) + 2;
3296 else 3293 else
3297 ret = 2 * (bpp + indirects) + 2; 3294 ret = 2 * (bpp + indirects) + indirects + 2;
3298 3295
3299#ifdef CONFIG_QUOTA 3296#ifdef CONFIG_QUOTA
3300 /* We know that structure was already allocated during dquot_initialize so 3297 /* We know that structure was already allocated during dquot_initialize so
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index fc080dd561f7..f4090bd2f345 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -38,7 +38,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
38 unsigned int oldflags; 38 unsigned int oldflags;
39 unsigned int jflag; 39 unsigned int jflag;
40 40
41 if (!is_owner_or_cap(inode)) 41 if (!inode_owner_or_capable(inode))
42 return -EACCES; 42 return -EACCES;
43 43
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
@@ -123,7 +123,7 @@ flags_out:
123 __u32 generation; 123 __u32 generation;
124 int err; 124 int err;
125 125
126 if (!is_owner_or_cap(inode)) 126 if (!inode_owner_or_capable(inode))
127 return -EPERM; 127 return -EPERM;
128 128
129 err = mnt_want_write(filp->f_path.mnt); 129 err = mnt_want_write(filp->f_path.mnt);
@@ -192,7 +192,7 @@ setversion_out:
192 if (err) 192 if (err)
193 return err; 193 return err;
194 194
195 if (!is_owner_or_cap(inode)) { 195 if (!inode_owner_or_capable(inode)) {
196 err = -EACCES; 196 err = -EACCES;
197 goto setrsvsz_out; 197 goto setrsvsz_out;
198 } 198 }
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 108b142e11ed..7916e4ce166a 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -1009,7 +1009,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1009 1009
1010 if (test_opt(sb, DEBUG)) 1010 if (test_opt(sb, DEBUG))
1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK 1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
1012 " upto "E3FSBLK" blocks\n", 1012 " up to "E3FSBLK" blocks\n",
1013 o_blocks_count, n_blocks_count); 1013 o_blocks_count, n_blocks_count);
1014 1014
1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 071689f86e18..3c6a9e0eadc1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2925,7 +2925,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2925 2925
2926/* Read data from quotafile - avoid pagecache and such because we cannot afford 2926/* Read data from quotafile - avoid pagecache and such because we cannot afford
2927 * acquiring the locks... As quota files are never truncated and quota code 2927 * acquiring the locks... As quota files are never truncated and quota code
2928 * itself serializes the operations (and noone else should touch the files) 2928 * itself serializes the operations (and no one else should touch the files)
2929 * we don't have to be afraid of races */ 2929 * we don't have to be afraid of races */
2930static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 2930static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2931 size_t len, loff_t off) 2931 size_t len, loff_t off)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index e0270d1f8d82..21eacd7b7d79 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -433,7 +433,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
433 return -EINVAL; 433 return -EINVAL;
434 if (!test_opt(inode->i_sb, POSIX_ACL)) 434 if (!test_opt(inode->i_sb, POSIX_ACL))
435 return -EOPNOTSUPP; 435 return -EOPNOTSUPP;
436 if (!is_owner_or_cap(inode)) 436 if (!inode_owner_or_capable(inode))
437 return -EPERM; 437 return -EPERM;
438 438
439 if (value) { 439 if (value) {
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index adf96b822781..1c67139ad4b4 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -21,6 +21,8 @@
21#include "ext4_jbd2.h" 21#include "ext4_jbd2.h"
22#include "mballoc.h" 22#include "mballoc.h"
23 23
24#include <trace/events/ext4.h>
25
24/* 26/*
25 * balloc.c contains the blocks allocation and deallocation routines 27 * balloc.c contains the blocks allocation and deallocation routines
26 */ 28 */
@@ -342,6 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
342 * We do it here so the bitmap uptodate bit 344 * We do it here so the bitmap uptodate bit
343 * get set with buffer lock held. 345 * get set with buffer lock held.
344 */ 346 */
347 trace_ext4_read_block_bitmap_load(sb, block_group);
345 set_bitmap_uptodate(bh); 348 set_bitmap_uptodate(bh);
346 if (bh_submit_read(bh) < 0) { 349 if (bh_submit_read(bh) < 0) {
347 put_bh(bh); 350 put_bh(bh);
@@ -544,7 +547,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
544 * 547 *
545 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if 548 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
546 * it is profitable to retry the operation, this function will wait 549 * it is profitable to retry the operation, this function will wait
547 * for the current or commiting transaction to complete, and then 550 * for the current or committing transaction to complete, and then
548 * return TRUE. 551 * return TRUE.
549 * 552 *
550 * if the total number of retries exceed three times, return FALSE. 553 * if the total number of retries exceed three times, return FALSE.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3aa0b72b3b94..4daaf2b753f4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -923,14 +923,14 @@ struct ext4_inode_info {
923#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 923#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
924 EXT4_MOUNT2_##opt) 924 EXT4_MOUNT2_##opt)
925 925
926#define ext4_set_bit ext2_set_bit 926#define ext4_set_bit __test_and_set_bit_le
927#define ext4_set_bit_atomic ext2_set_bit_atomic 927#define ext4_set_bit_atomic ext2_set_bit_atomic
928#define ext4_clear_bit ext2_clear_bit 928#define ext4_clear_bit __test_and_clear_bit_le
929#define ext4_clear_bit_atomic ext2_clear_bit_atomic 929#define ext4_clear_bit_atomic ext2_clear_bit_atomic
930#define ext4_test_bit ext2_test_bit 930#define ext4_test_bit test_bit_le
931#define ext4_find_first_zero_bit ext2_find_first_zero_bit 931#define ext4_find_first_zero_bit find_first_zero_bit_le
932#define ext4_find_next_zero_bit ext2_find_next_zero_bit 932#define ext4_find_next_zero_bit find_next_zero_bit_le
933#define ext4_find_next_bit ext2_find_next_bit 933#define ext4_find_next_bit find_next_bit_le
934 934
935/* 935/*
936 * Maximal mount counts between two filesystem checks 936 * Maximal mount counts between two filesystem checks
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index d8b992e658c1..d0f53538a57f 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -86,8 +86,8 @@
86 86
87#ifdef CONFIG_QUOTA 87#ifdef CONFIG_QUOTA
88/* Amount of blocks needed for quota update - we know that the structure was 88/* Amount of blocks needed for quota update - we know that the structure was
89 * allocated so we need to update only inode+data */ 89 * allocated so we need to update only data block */
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) 90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
91/* Amount of blocks needed for quota insert/delete - we do some block writes 91/* Amount of blocks needed for quota insert/delete - we do some block writes
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
@@ -202,13 +202,6 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
202 return 1; 202 return 1;
203} 203}
204 204
205static inline void ext4_journal_release_buffer(handle_t *handle,
206 struct buffer_head *bh)
207{
208 if (ext4_handle_valid(handle))
209 jbd2_journal_release_buffer(handle, bh);
210}
211
212static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) 205static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
213{ 206{
214 return ext4_journal_start_sb(inode->i_sb, nblocks); 207 return ext4_journal_start_sb(inode->i_sb, nblocks);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7516fb9c0bd5..4890d6f3ad15 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,8 @@
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h" 45#include "ext4_extents.h"
46 46
47#include <trace/events/ext4.h>
48
47static int ext4_ext_truncate_extend_restart(handle_t *handle, 49static int ext4_ext_truncate_extend_restart(handle_t *handle,
48 struct inode *inode, 50 struct inode *inode,
49 int needed) 51 int needed)
@@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
664 if (unlikely(!bh)) 666 if (unlikely(!bh))
665 goto err; 667 goto err;
666 if (!bh_uptodate_or_lock(bh)) { 668 if (!bh_uptodate_or_lock(bh)) {
669 trace_ext4_ext_load_extent(inode, block,
670 path[ppos].p_block);
667 if (bh_submit_read(bh) < 0) { 671 if (bh_submit_read(bh) < 0) {
668 put_bh(bh); 672 put_bh(bh);
669 goto err; 673 goto err;
@@ -1034,7 +1038,7 @@ cleanup:
1034 for (i = 0; i < depth; i++) { 1038 for (i = 0; i < depth; i++) {
1035 if (!ablocks[i]) 1039 if (!ablocks[i])
1036 continue; 1040 continue;
1037 ext4_free_blocks(handle, inode, 0, ablocks[i], 1, 1041 ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1038 EXT4_FREE_BLOCKS_METADATA); 1042 EXT4_FREE_BLOCKS_METADATA);
1039 } 1043 }
1040 } 1044 }
@@ -1725,7 +1729,7 @@ repeat:
1725 BUG_ON(npath->p_depth != path->p_depth); 1729 BUG_ON(npath->p_depth != path->p_depth);
1726 eh = npath[depth].p_hdr; 1730 eh = npath[depth].p_hdr;
1727 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1731 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
1728 ext_debug("next leaf isnt full(%d)\n", 1732 ext_debug("next leaf isn't full(%d)\n",
1729 le16_to_cpu(eh->eh_entries)); 1733 le16_to_cpu(eh->eh_entries));
1730 path = npath; 1734 path = npath;
1731 goto repeat; 1735 goto repeat;
@@ -2059,7 +2063,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2059 if (err) 2063 if (err)
2060 return err; 2064 return err;
2061 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2065 ext_debug("index is empty, remove it, free block %llu\n", leaf);
2062 ext4_free_blocks(handle, inode, 0, leaf, 1, 2066 ext4_free_blocks(handle, inode, NULL, leaf, 1,
2063 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2067 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2064 return err; 2068 return err;
2065} 2069}
@@ -2156,7 +2160,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2156 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2160 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2157 start = ext4_ext_pblock(ex) + ee_len - num; 2161 start = ext4_ext_pblock(ex) + ee_len - num;
2158 ext_debug("free last %u blocks starting %llu\n", num, start); 2162 ext_debug("free last %u blocks starting %llu\n", num, start);
2159 ext4_free_blocks(handle, inode, 0, start, num, flags); 2163 ext4_free_blocks(handle, inode, NULL, start, num, flags);
2160 } else if (from == le32_to_cpu(ex->ee_block) 2164 } else if (from == le32_to_cpu(ex->ee_block)
2161 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2165 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2162 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2166 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2529,7 +2533,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2529/* 2533/*
2530 * This function is called by ext4_ext_map_blocks() if someone tries to write 2534 * This function is called by ext4_ext_map_blocks() if someone tries to write
2531 * to an uninitialized extent. It may result in splitting the uninitialized 2535 * to an uninitialized extent. It may result in splitting the uninitialized
2532 * extent into multiple extents (upto three - one initialized and two 2536 * extent into multiple extents (up to three - one initialized and two
2533 * uninitialized). 2537 * uninitialized).
2534 * There are three possibilities: 2538 * There are three possibilities:
2535 * a> There is no split required: Entire extent should be initialized 2539 * a> There is no split required: Entire extent should be initialized
@@ -3108,14 +3112,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3108{ 3112{
3109 int i, depth; 3113 int i, depth;
3110 struct ext4_extent_header *eh; 3114 struct ext4_extent_header *eh;
3111 struct ext4_extent *ex, *last_ex; 3115 struct ext4_extent *last_ex;
3112 3116
3113 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) 3117 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
3114 return 0; 3118 return 0;
3115 3119
3116 depth = ext_depth(inode); 3120 depth = ext_depth(inode);
3117 eh = path[depth].p_hdr; 3121 eh = path[depth].p_hdr;
3118 ex = path[depth].p_ext;
3119 3122
3120 if (unlikely(!eh->eh_entries)) { 3123 if (unlikely(!eh->eh_entries)) {
3121 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 3124 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@ -3171,7 +3174,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3171 path, flags); 3174 path, flags);
3172 /* 3175 /*
3173 * Flag the inode(non aio case) or end_io struct (aio case) 3176 * Flag the inode(non aio case) or end_io struct (aio case)
3174 * that this IO needs to convertion to written when IO is 3177 * that this IO needs to conversion to written when IO is
3175 * completed 3178 * completed
3176 */ 3179 */
3177 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3180 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3295,9 +3298,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3295 struct ext4_map_blocks *map, int flags) 3298 struct ext4_map_blocks *map, int flags)
3296{ 3299{
3297 struct ext4_ext_path *path = NULL; 3300 struct ext4_ext_path *path = NULL;
3298 struct ext4_extent_header *eh;
3299 struct ext4_extent newex, *ex; 3301 struct ext4_extent newex, *ex;
3300 ext4_fsblk_t newblock; 3302 ext4_fsblk_t newblock = 0;
3301 int err = 0, depth, ret; 3303 int err = 0, depth, ret;
3302 unsigned int allocated = 0; 3304 unsigned int allocated = 0;
3303 struct ext4_allocation_request ar; 3305 struct ext4_allocation_request ar;
@@ -3305,6 +3307,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3305 3307
3306 ext_debug("blocks %u/%u requested for inode %lu\n", 3308 ext_debug("blocks %u/%u requested for inode %lu\n",
3307 map->m_lblk, map->m_len, inode->i_ino); 3309 map->m_lblk, map->m_len, inode->i_ino);
3310 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3308 3311
3309 /* check in cache */ 3312 /* check in cache */
3310 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3313 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@ -3352,7 +3355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3352 err = -EIO; 3355 err = -EIO;
3353 goto out2; 3356 goto out2;
3354 } 3357 }
3355 eh = path[depth].p_hdr;
3356 3358
3357 ex = path[depth].p_ext; 3359 ex = path[depth].p_ext;
3358 if (ex) { 3360 if (ex) {
@@ -3458,10 +3460,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3458 ext4_ext_mark_uninitialized(&newex); 3460 ext4_ext_mark_uninitialized(&newex);
3459 /* 3461 /*
3460 * io_end structure was created for every IO write to an 3462 * io_end structure was created for every IO write to an
3461 * uninitialized extent. To avoid unecessary conversion, 3463 * uninitialized extent. To avoid unnecessary conversion,
3462 * here we flag the IO that really needs the conversion. 3464 * here we flag the IO that really needs the conversion.
3463 * For non asycn direct IO case, flag the inode state 3465 * For non asycn direct IO case, flag the inode state
3464 * that we need to perform convertion when IO is done. 3466 * that we need to perform conversion when IO is done.
3465 */ 3467 */
3466 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3468 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3467 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3469 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3485,7 +3487,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3485 /* not a good idea to call discard here directly, 3487 /* not a good idea to call discard here directly,
3486 * but otherwise we'd need to call it every free() */ 3488 * but otherwise we'd need to call it every free() */
3487 ext4_discard_preallocations(inode); 3489 ext4_discard_preallocations(inode);
3488 ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), 3490 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
3489 ext4_ext_get_actual_len(&newex), 0); 3491 ext4_ext_get_actual_len(&newex), 0);
3490 goto out2; 3492 goto out2;
3491 } 3493 }
@@ -3525,6 +3527,8 @@ out2:
3525 ext4_ext_drop_refs(path); 3527 ext4_ext_drop_refs(path);
3526 kfree(path); 3528 kfree(path);
3527 } 3529 }
3530 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
3531 newblock, map->m_len, err ? err : allocated);
3528 return err ? err : allocated; 3532 return err ? err : allocated;
3529} 3533}
3530 3534
@@ -3658,6 +3662,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3658 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3662 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
3659 return -EOPNOTSUPP; 3663 return -EOPNOTSUPP;
3660 3664
3665 trace_ext4_fallocate_enter(inode, offset, len, mode);
3661 map.m_lblk = offset >> blkbits; 3666 map.m_lblk = offset >> blkbits;
3662 /* 3667 /*
3663 * We can't just convert len to max_blocks because 3668 * We can't just convert len to max_blocks because
@@ -3673,6 +3678,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3673 ret = inode_newsize_ok(inode, (len + offset)); 3678 ret = inode_newsize_ok(inode, (len + offset));
3674 if (ret) { 3679 if (ret) {
3675 mutex_unlock(&inode->i_mutex); 3680 mutex_unlock(&inode->i_mutex);
3681 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
3676 return ret; 3682 return ret;
3677 } 3683 }
3678retry: 3684retry:
@@ -3717,6 +3723,8 @@ retry:
3717 goto retry; 3723 goto retry;
3718 } 3724 }
3719 mutex_unlock(&inode->i_mutex); 3725 mutex_unlock(&inode->i_mutex);
3726 trace_ext4_fallocate_exit(inode, offset, max_blocks,
3727 ret > 0 ? ret2 : ret);
3720 return ret > 0 ? ret2 : ret; 3728 return ret > 0 ? ret2 : ret;
3721} 3729}
3722 3730
@@ -3775,6 +3783,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3775 } 3783 }
3776 return ret > 0 ? ret2 : ret; 3784 return ret > 0 ? ret2 : ret;
3777} 3785}
3786
3778/* 3787/*
3779 * Callback function called for each extent to gather FIEMAP information. 3788 * Callback function called for each extent to gather FIEMAP information.
3780 */ 3789 */
@@ -3782,38 +3791,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3782 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3791 struct ext4_ext_cache *newex, struct ext4_extent *ex,
3783 void *data) 3792 void *data)
3784{ 3793{
3785 struct fiemap_extent_info *fieinfo = data;
3786 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
3787 __u64 logical; 3794 __u64 logical;
3788 __u64 physical; 3795 __u64 physical;
3789 __u64 length; 3796 __u64 length;
3797 loff_t size;
3790 __u32 flags = 0; 3798 __u32 flags = 0;
3791 int error; 3799 int ret = 0;
3800 struct fiemap_extent_info *fieinfo = data;
3801 unsigned char blksize_bits;
3792 3802
3793 logical = (__u64)newex->ec_block << blksize_bits; 3803 blksize_bits = inode->i_sb->s_blocksize_bits;
3804 logical = (__u64)newex->ec_block << blksize_bits;
3794 3805
3795 if (newex->ec_start == 0) { 3806 if (newex->ec_start == 0) {
3796 pgoff_t offset; 3807 /*
3797 struct page *page; 3808 * No extent in extent-tree contains block @newex->ec_start,
3809 * then the block may stay in 1)a hole or 2)delayed-extent.
3810 *
3811 * Holes or delayed-extents are processed as follows.
3812 * 1. lookup dirty pages with specified range in pagecache.
3813 * If no page is got, then there is no delayed-extent and
3814 * return with EXT_CONTINUE.
3815 * 2. find the 1st mapped buffer,
3816 * 3. check if the mapped buffer is both in the request range
3817 * and a delayed buffer. If not, there is no delayed-extent,
3818 * then return.
3819 * 4. a delayed-extent is found, the extent will be collected.
3820 */
3821 ext4_lblk_t end = 0;
3822 pgoff_t last_offset;
3823 pgoff_t offset;
3824 pgoff_t index;
3825 struct page **pages = NULL;
3798 struct buffer_head *bh = NULL; 3826 struct buffer_head *bh = NULL;
3827 struct buffer_head *head = NULL;
3828 unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
3829
3830 pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
3831 if (pages == NULL)
3832 return -ENOMEM;
3799 3833
3800 offset = logical >> PAGE_SHIFT; 3834 offset = logical >> PAGE_SHIFT;
3801 page = find_get_page(inode->i_mapping, offset); 3835repeat:
3802 if (!page || !page_has_buffers(page)) 3836 last_offset = offset;
3803 return EXT_CONTINUE; 3837 head = NULL;
3838 ret = find_get_pages_tag(inode->i_mapping, &offset,
3839 PAGECACHE_TAG_DIRTY, nr_pages, pages);
3840
3841 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
3842 /* First time, try to find a mapped buffer. */
3843 if (ret == 0) {
3844out:
3845 for (index = 0; index < ret; index++)
3846 page_cache_release(pages[index]);
3847 /* just a hole. */
3848 kfree(pages);
3849 return EXT_CONTINUE;
3850 }
3804 3851
3805 bh = page_buffers(page); 3852 /* Try to find the 1st mapped buffer. */
3853 end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
3854 blksize_bits;
3855 if (!page_has_buffers(pages[0]))
3856 goto out;
3857 head = page_buffers(pages[0]);
3858 if (!head)
3859 goto out;
3806 3860
3807 if (!bh) 3861 bh = head;
3808 return EXT_CONTINUE; 3862 do {
3863 if (buffer_mapped(bh)) {
3864 /* get the 1st mapped buffer. */
3865 if (end > newex->ec_block +
3866 newex->ec_len)
3867 /* The buffer is out of
3868 * the request range.
3869 */
3870 goto out;
3871 goto found_mapped_buffer;
3872 }
3873 bh = bh->b_this_page;
3874 end++;
3875 } while (bh != head);
3809 3876
3810 if (buffer_delay(bh)) { 3877 /* No mapped buffer found. */
3811 flags |= FIEMAP_EXTENT_DELALLOC; 3878 goto out;
3812 page_cache_release(page);
3813 } else { 3879 } else {
3814 page_cache_release(page); 3880 /*Find contiguous delayed buffers. */
3815 return EXT_CONTINUE; 3881 if (ret > 0 && pages[0]->index == last_offset)
3882 head = page_buffers(pages[0]);
3883 bh = head;
3816 } 3884 }
3885
3886found_mapped_buffer:
3887 if (bh != NULL && buffer_delay(bh)) {
3888 /* 1st or contiguous delayed buffer found. */
3889 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
3890 /*
3891 * 1st delayed buffer found, record
3892 * the start of extent.
3893 */
3894 flags |= FIEMAP_EXTENT_DELALLOC;
3895 newex->ec_block = end;
3896 logical = (__u64)end << blksize_bits;
3897 }
3898 /* Find contiguous delayed buffers. */
3899 do {
3900 if (!buffer_delay(bh))
3901 goto found_delayed_extent;
3902 bh = bh->b_this_page;
3903 end++;
3904 } while (bh != head);
3905
3906 for (index = 1; index < ret; index++) {
3907 if (!page_has_buffers(pages[index])) {
3908 bh = NULL;
3909 break;
3910 }
3911 head = page_buffers(pages[index]);
3912 if (!head) {
3913 bh = NULL;
3914 break;
3915 }
3916 if (pages[index]->index !=
3917 pages[0]->index + index) {
3918 /* Blocks are not contiguous. */
3919 bh = NULL;
3920 break;
3921 }
3922 bh = head;
3923 do {
3924 if (!buffer_delay(bh))
3925 /* Delayed-extent ends. */
3926 goto found_delayed_extent;
3927 bh = bh->b_this_page;
3928 end++;
3929 } while (bh != head);
3930 }
3931 } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
3932 /* a hole found. */
3933 goto out;
3934
3935found_delayed_extent:
3936 newex->ec_len = min(end - newex->ec_block,
3937 (ext4_lblk_t)EXT_INIT_MAX_LEN);
3938 if (ret == nr_pages && bh != NULL &&
3939 newex->ec_len < EXT_INIT_MAX_LEN &&
3940 buffer_delay(bh)) {
3941 /* Have not collected an extent and continue. */
3942 for (index = 0; index < ret; index++)
3943 page_cache_release(pages[index]);
3944 goto repeat;
3945 }
3946
3947 for (index = 0; index < ret; index++)
3948 page_cache_release(pages[index]);
3949 kfree(pages);
3817 } 3950 }
3818 3951
3819 physical = (__u64)newex->ec_start << blksize_bits; 3952 physical = (__u64)newex->ec_start << blksize_bits;
@@ -3822,32 +3955,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3822 if (ex && ext4_ext_is_uninitialized(ex)) 3955 if (ex && ext4_ext_is_uninitialized(ex))
3823 flags |= FIEMAP_EXTENT_UNWRITTEN; 3956 flags |= FIEMAP_EXTENT_UNWRITTEN;
3824 3957
3825 /* 3958 size = i_size_read(inode);
3826 * If this extent reaches EXT_MAX_BLOCK, it must be last. 3959 if (logical + length >= size)
3827 *
3828 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
3829 * this also indicates no more allocated blocks.
3830 *
3831 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3832 */
3833 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
3834 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
3835 loff_t size = i_size_read(inode);
3836 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
3837
3838 flags |= FIEMAP_EXTENT_LAST; 3960 flags |= FIEMAP_EXTENT_LAST;
3839 if ((flags & FIEMAP_EXTENT_DELALLOC) &&
3840 logical+length > size)
3841 length = (size - logical + bs - 1) & ~(bs-1);
3842 }
3843 3961
3844 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3962 ret = fiemap_fill_next_extent(fieinfo, logical, physical,
3845 length, flags); 3963 length, flags);
3846 if (error < 0) 3964 if (ret < 0)
3847 return error; 3965 return ret;
3848 if (error == 1) 3966 if (ret == 1)
3849 return EXT_BREAK; 3967 return EXT_BREAK;
3850
3851 return EXT_CONTINUE; 3968 return EXT_CONTINUE;
3852} 3969}
3853 3970
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 7829b287822a..e9473cbe80df 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -101,7 +101,7 @@ extern int ext4_flush_completed_IO(struct inode *inode)
101 * to the work-to-be schedule is freed. 101 * to the work-to-be schedule is freed.
102 * 102 *
103 * Thus we need to keep the io structure still valid here after 103 * Thus we need to keep the io structure still valid here after
104 * convertion finished. The io structure has a flag to 104 * conversion finished. The io structure has a flag to
105 * avoid double converting from both fsync and background work 105 * avoid double converting from both fsync and background work
106 * queue work. 106 * queue work.
107 */ 107 */
@@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode)
125 * the parent directory's parent as well, and so on recursively, if 125 * the parent directory's parent as well, and so on recursively, if
126 * they are also freshly created. 126 * they are also freshly created.
127 */ 127 */
128static void ext4_sync_parent(struct inode *inode) 128static int ext4_sync_parent(struct inode *inode)
129{ 129{
130 struct writeback_control wbc;
130 struct dentry *dentry = NULL; 131 struct dentry *dentry = NULL;
132 int ret = 0;
131 133
132 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 134 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
133 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); 135 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode)
136 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) 138 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
137 break; 139 break;
138 inode = dentry->d_parent->d_inode; 140 inode = dentry->d_parent->d_inode;
139 sync_mapping_buffers(inode->i_mapping); 141 ret = sync_mapping_buffers(inode->i_mapping);
142 if (ret)
143 break;
144 memset(&wbc, 0, sizeof(wbc));
145 wbc.sync_mode = WB_SYNC_ALL;
146 wbc.nr_to_write = 0; /* only write out the inode */
147 ret = sync_inode(inode, &wbc);
148 if (ret)
149 break;
140 } 150 }
151 return ret;
141} 152}
142 153
143/* 154/*
@@ -164,20 +175,20 @@ int ext4_sync_file(struct file *file, int datasync)
164 175
165 J_ASSERT(ext4_journal_current_handle() == NULL); 176 J_ASSERT(ext4_journal_current_handle() == NULL);
166 177
167 trace_ext4_sync_file(file, datasync); 178 trace_ext4_sync_file_enter(file, datasync);
168 179
169 if (inode->i_sb->s_flags & MS_RDONLY) 180 if (inode->i_sb->s_flags & MS_RDONLY)
170 return 0; 181 return 0;
171 182
172 ret = ext4_flush_completed_IO(inode); 183 ret = ext4_flush_completed_IO(inode);
173 if (ret < 0) 184 if (ret < 0)
174 return ret; 185 goto out;
175 186
176 if (!journal) { 187 if (!journal) {
177 ret = generic_file_fsync(file, datasync); 188 ret = generic_file_fsync(file, datasync);
178 if (!ret && !list_empty(&inode->i_dentry)) 189 if (!ret && !list_empty(&inode->i_dentry))
179 ext4_sync_parent(inode); 190 ret = ext4_sync_parent(inode);
180 return ret; 191 goto out;
181 } 192 }
182 193
183 /* 194 /*
@@ -194,8 +205,10 @@ int ext4_sync_file(struct file *file, int datasync)
194 * (they were dirtied by commit). But that's OK - the blocks are 205 * (they were dirtied by commit). But that's OK - the blocks are
195 * safe in-journal, which is all fsync() needs to ensure. 206 * safe in-journal, which is all fsync() needs to ensure.
196 */ 207 */
197 if (ext4_should_journal_data(inode)) 208 if (ext4_should_journal_data(inode)) {
198 return ext4_force_commit(inode->i_sb); 209 ret = ext4_force_commit(inode->i_sb);
210 goto out;
211 }
199 212
200 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 213 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
201 if (jbd2_log_start_commit(journal, commit_tid)) { 214 if (jbd2_log_start_commit(journal, commit_tid)) {
@@ -215,5 +228,7 @@ int ext4_sync_file(struct file *file, int datasync)
215 ret = jbd2_log_wait_commit(journal, commit_tid); 228 ret = jbd2_log_wait_commit(journal, commit_tid);
216 } else if (journal->j_flags & JBD2_BARRIER) 229 } else if (journal->j_flags & JBD2_BARRIER)
217 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 230 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
231 out:
232 trace_ext4_sync_file_exit(inode, ret);
218 return ret; 233 return ret;
219} 234}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 78b79e1bd7ed..21bb2f61e502 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -152,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
152 * We do it here so the bitmap uptodate bit 152 * We do it here so the bitmap uptodate bit
153 * get set with buffer lock held. 153 * get set with buffer lock held.
154 */ 154 */
155 trace_ext4_load_inode_bitmap(sb, block_group);
155 set_bitmap_uptodate(bh); 156 set_bitmap_uptodate(bh);
156 if (bh_submit_read(bh) < 0) { 157 if (bh_submit_read(bh) < 0) {
157 put_bh(bh); 158 put_bh(bh);
@@ -649,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
649 *group = parent_group + flex_size; 650 *group = parent_group + flex_size;
650 if (*group > ngroups) 651 if (*group > ngroups)
651 *group = 0; 652 *group = 0;
652 return find_group_orlov(sb, parent, group, mode, 0); 653 return find_group_orlov(sb, parent, group, mode, NULL);
653 } 654 }
654 655
655 /* 656 /*
@@ -1054,6 +1055,11 @@ got:
1054 } 1055 }
1055 } 1056 }
1056 1057
1058 if (ext4_handle_valid(handle)) {
1059 ei->i_sync_tid = handle->h_transaction->t_tid;
1060 ei->i_datasync_tid = handle->h_transaction->t_tid;
1061 }
1062
1057 err = ext4_mark_inode_dirty(handle, inode); 1063 err = ext4_mark_inode_dirty(handle, inode);
1058 if (err) { 1064 if (err) {
1059 ext4_std_error(sb, err); 1065 ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9f7f9e49914f..f2fa5e8a582c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
173 BUG_ON(EXT4_JOURNAL(inode) == NULL); 173 BUG_ON(EXT4_JOURNAL(inode) == NULL);
174 jbd_debug(2, "restarting handle %p\n", handle); 174 jbd_debug(2, "restarting handle %p\n", handle);
175 up_write(&EXT4_I(inode)->i_data_sem); 175 up_write(&EXT4_I(inode)->i_data_sem);
176 ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); 176 ret = ext4_journal_restart(handle, nblocks);
177 down_write(&EXT4_I(inode)->i_data_sem); 177 down_write(&EXT4_I(inode)->i_data_sem);
178 ext4_discard_preallocations(inode); 178 ext4_discard_preallocations(inode);
179 179
@@ -720,7 +720,7 @@ allocated:
720 return ret; 720 return ret;
721failed_out: 721failed_out:
722 for (i = 0; i < index; i++) 722 for (i = 0; i < index; i++)
723 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); 723 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
724 return ret; 724 return ret;
725} 725}
726 726
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
823 return err; 823 return err;
824failed: 824failed:
825 /* Allocation failed, free what we already allocated */ 825 /* Allocation failed, free what we already allocated */
826 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); 826 ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
827 for (i = 1; i <= n ; i++) { 827 for (i = 1; i <= n ; i++) {
828 /* 828 /*
829 * branch[i].bh is newly allocated, so there is no 829 * branch[i].bh is newly allocated, so there is no
830 * need to revoke the block, which is why we don't 830 * need to revoke the block, which is why we don't
831 * need to set EXT4_FREE_BLOCKS_METADATA. 831 * need to set EXT4_FREE_BLOCKS_METADATA.
832 */ 832 */
833 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 833 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
834 EXT4_FREE_BLOCKS_FORGET); 834 EXT4_FREE_BLOCKS_FORGET);
835 } 835 }
836 for (i = n+1; i < indirect_blks; i++) 836 for (i = n+1; i < indirect_blks; i++)
837 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); 837 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
838 838
839 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); 839 ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
840 840
841 return err; 841 return err;
842} 842}
@@ -924,7 +924,7 @@ err_out:
924 ext4_free_blocks(handle, inode, where[i].bh, 0, 1, 924 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
925 EXT4_FREE_BLOCKS_FORGET); 925 EXT4_FREE_BLOCKS_FORGET);
926 } 926 }
927 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), 927 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
928 blks, 0); 928 blks, 0);
929 929
930 return err; 930 return err;
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
973 int count = 0; 973 int count = 0;
974 ext4_fsblk_t first_block = 0; 974 ext4_fsblk_t first_block = 0;
975 975
976 trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
976 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); 977 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
977 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 978 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
978 depth = ext4_block_to_path(inode, map->m_lblk, offsets, 979 depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@ -1058,6 +1059,8 @@ cleanup:
1058 partial--; 1059 partial--;
1059 } 1060 }
1060out: 1061out:
1062 trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
1063 map->m_pblk, map->m_len, err);
1061 return err; 1064 return err;
1062} 1065}
1063 1066
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2060 if (nr_pages == 0) 2063 if (nr_pages == 0)
2061 break; 2064 break;
2062 for (i = 0; i < nr_pages; i++) { 2065 for (i = 0; i < nr_pages; i++) {
2063 int commit_write = 0, redirty_page = 0; 2066 int commit_write = 0, skip_page = 0;
2064 struct page *page = pvec.pages[i]; 2067 struct page *page = pvec.pages[i];
2065 2068
2066 index = page->index; 2069 index = page->index;
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2086 * If the page does not have buffers (for 2089 * If the page does not have buffers (for
2087 * whatever reason), try to create them using 2090 * whatever reason), try to create them using
2088 * __block_write_begin. If this fails, 2091 * __block_write_begin. If this fails,
2089 * redirty the page and move on. 2092 * skip the page and move on.
2090 */ 2093 */
2091 if (!page_has_buffers(page)) { 2094 if (!page_has_buffers(page)) {
2092 if (__block_write_begin(page, 0, len, 2095 if (__block_write_begin(page, 0, len,
2093 noalloc_get_block_write)) { 2096 noalloc_get_block_write)) {
2094 redirty_page: 2097 skip_page:
2095 redirty_page_for_writepage(mpd->wbc,
2096 page);
2097 unlock_page(page); 2098 unlock_page(page);
2098 continue; 2099 continue;
2099 } 2100 }
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2104 block_start = 0; 2105 block_start = 0;
2105 do { 2106 do {
2106 if (!bh) 2107 if (!bh)
2107 goto redirty_page; 2108 goto skip_page;
2108 if (map && (cur_logical >= map->m_lblk) && 2109 if (map && (cur_logical >= map->m_lblk) &&
2109 (cur_logical <= (map->m_lblk + 2110 (cur_logical <= (map->m_lblk +
2110 (map->m_len - 1)))) { 2111 (map->m_len - 1)))) {
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2120 clear_buffer_unwritten(bh); 2121 clear_buffer_unwritten(bh);
2121 } 2122 }
2122 2123
2123 /* redirty page if block allocation undone */ 2124 /* skip page if block allocation undone */
2124 if (buffer_delay(bh) || buffer_unwritten(bh)) 2125 if (buffer_delay(bh) || buffer_unwritten(bh))
2125 redirty_page = 1; 2126 skip_page = 1;
2126 bh = bh->b_this_page; 2127 bh = bh->b_this_page;
2127 block_start += bh->b_size; 2128 block_start += bh->b_size;
2128 cur_logical++; 2129 cur_logical++;
2129 pblock++; 2130 pblock++;
2130 } while (bh != page_bufs); 2131 } while (bh != page_bufs);
2131 2132
2132 if (redirty_page) 2133 if (skip_page)
2133 goto redirty_page; 2134 goto skip_page;
2134 2135
2135 if (commit_write) 2136 if (commit_write)
2136 /* mark the buffer_heads as dirty & uptodate */ 2137 /* mark the buffer_heads as dirty & uptodate */
2137 block_commit_write(page, 0, len); 2138 block_commit_write(page, 0, len);
2138 2139
2140 clear_page_dirty_for_io(page);
2139 /* 2141 /*
2140 * Delalloc doesn't support data journalling, 2142 * Delalloc doesn't support data journalling,
2141 * but eventually maybe we'll lift this 2143 * but eventually maybe we'll lift this
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2165 return ret; 2167 return ret;
2166} 2168}
2167 2169
2168static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, 2170static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
2169 sector_t logical, long blk_cnt)
2170{ 2171{
2171 int nr_pages, i; 2172 int nr_pages, i;
2172 pgoff_t index, end; 2173 pgoff_t index, end;
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2174 struct inode *inode = mpd->inode; 2175 struct inode *inode = mpd->inode;
2175 struct address_space *mapping = inode->i_mapping; 2176 struct address_space *mapping = inode->i_mapping;
2176 2177
2177 index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); 2178 index = mpd->first_page;
2178 end = (logical + blk_cnt - 1) >> 2179 end = mpd->next_page - 1;
2179 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2180 while (index <= end) { 2180 while (index <= end) {
2181 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 2181 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
2182 if (nr_pages == 0) 2182 if (nr_pages == 0)
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2279 err = blks; 2279 err = blks;
2280 /* 2280 /*
2281 * If get block returns EAGAIN or ENOSPC and there 2281 * If get block returns EAGAIN or ENOSPC and there
2282 * appears to be free blocks we will call 2282 * appears to be free blocks we will just let
2283 * ext4_writepage() for all of the pages which will 2283 * mpage_da_submit_io() unlock all of the pages.
2284 * just redirty the pages.
2285 */ 2284 */
2286 if (err == -EAGAIN) 2285 if (err == -EAGAIN)
2287 goto submit_io; 2286 goto submit_io;
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2312 ext4_print_free_blocks(mpd->inode); 2311 ext4_print_free_blocks(mpd->inode);
2313 } 2312 }
2314 /* invalidate all the pages */ 2313 /* invalidate all the pages */
2315 ext4_da_block_invalidatepages(mpd, next, 2314 ext4_da_block_invalidatepages(mpd);
2316 mpd->b_size >> mpd->inode->i_blkbits); 2315
2316 /* Mark this page range as having been completed */
2317 mpd->io_done = 1;
2317 return; 2318 return;
2318 } 2319 }
2319 BUG_ON(blks == 0); 2320 BUG_ON(blks == 0);
@@ -2438,102 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2438} 2439}
2439 2440
2440/* 2441/*
2441 * __mpage_da_writepage - finds extent of pages and blocks
2442 *
2443 * @page: page to consider
2444 * @wbc: not used, we just follow rules
2445 * @data: context
2446 *
2447 * The function finds extents of pages and scan them for all blocks.
2448 */
2449static int __mpage_da_writepage(struct page *page,
2450 struct writeback_control *wbc,
2451 struct mpage_da_data *mpd)
2452{
2453 struct inode *inode = mpd->inode;
2454 struct buffer_head *bh, *head;
2455 sector_t logical;
2456
2457 /*
2458 * Can we merge this page to current extent?
2459 */
2460 if (mpd->next_page != page->index) {
2461 /*
2462 * Nope, we can't. So, we map non-allocated blocks
2463 * and start IO on them
2464 */
2465 if (mpd->next_page != mpd->first_page) {
2466 mpage_da_map_and_submit(mpd);
2467 /*
2468 * skip rest of the page in the page_vec
2469 */
2470 redirty_page_for_writepage(wbc, page);
2471 unlock_page(page);
2472 return MPAGE_DA_EXTENT_TAIL;
2473 }
2474
2475 /*
2476 * Start next extent of pages ...
2477 */
2478 mpd->first_page = page->index;
2479
2480 /*
2481 * ... and blocks
2482 */
2483 mpd->b_size = 0;
2484 mpd->b_state = 0;
2485 mpd->b_blocknr = 0;
2486 }
2487
2488 mpd->next_page = page->index + 1;
2489 logical = (sector_t) page->index <<
2490 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2491
2492 if (!page_has_buffers(page)) {
2493 mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
2494 (1 << BH_Dirty) | (1 << BH_Uptodate));
2495 if (mpd->io_done)
2496 return MPAGE_DA_EXTENT_TAIL;
2497 } else {
2498 /*
2499 * Page with regular buffer heads, just add all dirty ones
2500 */
2501 head = page_buffers(page);
2502 bh = head;
2503 do {
2504 BUG_ON(buffer_locked(bh));
2505 /*
2506 * We need to try to allocate
2507 * unmapped blocks in the same page.
2508 * Otherwise we won't make progress
2509 * with the page in ext4_writepage
2510 */
2511 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2512 mpage_add_bh_to_extent(mpd, logical,
2513 bh->b_size,
2514 bh->b_state);
2515 if (mpd->io_done)
2516 return MPAGE_DA_EXTENT_TAIL;
2517 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2518 /*
2519 * mapped dirty buffer. We need to update
2520 * the b_state because we look at
2521 * b_state in mpage_da_map_blocks. We don't
2522 * update b_size because if we find an
2523 * unmapped buffer_head later we need to
2524 * use the b_state flag of that buffer_head.
2525 */
2526 if (mpd->b_size == 0)
2527 mpd->b_state = bh->b_state & BH_FLAGS;
2528 }
2529 logical++;
2530 } while ((bh = bh->b_this_page) != head);
2531 }
2532
2533 return 0;
2534}
2535
2536/*
2537 * This is a special get_blocks_t callback which is used by 2442 * This is a special get_blocks_t callback which is used by
2538 * ext4_da_write_begin(). It will either return mapped block or 2443 * ext4_da_write_begin(). It will either return mapped block or
2539 * reserve space for a single block. 2444 * reserve space for a single block.
@@ -2684,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2684 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2589 * because we should have holes filled from ext4_page_mkwrite(). We even don't
2685 * need to file the inode to the transaction's list in ordered mode because if 2590 * need to file the inode to the transaction's list in ordered mode because if
2686 * we are writing back data added by write(), the inode is already there and if 2591 * we are writing back data added by write(), the inode is already there and if
2687 * we are writing back data modified via mmap(), noone guarantees in which 2592 * we are writing back data modified via mmap(), no one guarantees in which
2688 * transaction the data will hit the disk. In case we are journaling data, we 2593 * transaction the data will hit the disk. In case we are journaling data, we
2689 * cannot start transaction directly because transaction start ranks above page 2594 * cannot start transaction directly because transaction start ranks above page
2690 * lock so we have to do some magic. 2595 * lock so we have to do some magic.
@@ -2786,7 +2691,7 @@ static int ext4_writepage(struct page *page,
2786 2691
2787/* 2692/*
2788 * This is called via ext4_da_writepages() to 2693 * This is called via ext4_da_writepages() to
2789 * calulate the total number of credits to reserve to fit 2694 * calculate the total number of credits to reserve to fit
2790 * a single extent allocation into a single transaction, 2695 * a single extent allocation into a single transaction,
2791 * ext4_da_writpeages() will loop calling this before 2696 * ext4_da_writpeages() will loop calling this before
2792 * the block allocation. 2697 * the block allocation.
@@ -2811,27 +2716,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2811 2716
2812/* 2717/*
2813 * write_cache_pages_da - walk the list of dirty pages of the given 2718 * write_cache_pages_da - walk the list of dirty pages of the given
2814 * address space and call the callback function (which usually writes 2719 * address space and accumulate pages that need writing, and call
2815 * the pages). 2720 * mpage_da_map_and_submit to map a single contiguous memory region
2816 * 2721 * and then write them.
2817 * This is a forked version of write_cache_pages(). Differences:
2818 * Range cyclic is ignored.
2819 * no_nrwrite_index_update is always presumed true
2820 */ 2722 */
2821static int write_cache_pages_da(struct address_space *mapping, 2723static int write_cache_pages_da(struct address_space *mapping,
2822 struct writeback_control *wbc, 2724 struct writeback_control *wbc,
2823 struct mpage_da_data *mpd, 2725 struct mpage_da_data *mpd,
2824 pgoff_t *done_index) 2726 pgoff_t *done_index)
2825{ 2727{
2826 int ret = 0; 2728 struct buffer_head *bh, *head;
2827 int done = 0; 2729 struct inode *inode = mapping->host;
2828 struct pagevec pvec; 2730 struct pagevec pvec;
2829 unsigned nr_pages; 2731 unsigned int nr_pages;
2830 pgoff_t index; 2732 sector_t logical;
2831 pgoff_t end; /* Inclusive */ 2733 pgoff_t index, end;
2832 long nr_to_write = wbc->nr_to_write; 2734 long nr_to_write = wbc->nr_to_write;
2833 int tag; 2735 int i, tag, ret = 0;
2834 2736
2737 memset(mpd, 0, sizeof(struct mpage_da_data));
2738 mpd->wbc = wbc;
2739 mpd->inode = inode;
2835 pagevec_init(&pvec, 0); 2740 pagevec_init(&pvec, 0);
2836 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2741 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2837 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2742 end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@ -2842,13 +2747,11 @@ static int write_cache_pages_da(struct address_space *mapping,
2842 tag = PAGECACHE_TAG_DIRTY; 2747 tag = PAGECACHE_TAG_DIRTY;
2843 2748
2844 *done_index = index; 2749 *done_index = index;
2845 while (!done && (index <= end)) { 2750 while (index <= end) {
2846 int i;
2847
2848 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, 2751 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2849 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2752 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2850 if (nr_pages == 0) 2753 if (nr_pages == 0)
2851 break; 2754 return 0;
2852 2755
2853 for (i = 0; i < nr_pages; i++) { 2756 for (i = 0; i < nr_pages; i++) {
2854 struct page *page = pvec.pages[i]; 2757 struct page *page = pvec.pages[i];
@@ -2860,60 +2763,100 @@ static int write_cache_pages_da(struct address_space *mapping,
2860 * mapping. However, page->index will not change 2763 * mapping. However, page->index will not change
2861 * because we have a reference on the page. 2764 * because we have a reference on the page.
2862 */ 2765 */
2863 if (page->index > end) { 2766 if (page->index > end)
2864 done = 1; 2767 goto out;
2865 break;
2866 }
2867 2768
2868 *done_index = page->index + 1; 2769 *done_index = page->index + 1;
2869 2770
2771 /*
2772 * If we can't merge this page, and we have
2773 * accumulated an contiguous region, write it
2774 */
2775 if ((mpd->next_page != page->index) &&
2776 (mpd->next_page != mpd->first_page)) {
2777 mpage_da_map_and_submit(mpd);
2778 goto ret_extent_tail;
2779 }
2780
2870 lock_page(page); 2781 lock_page(page);
2871 2782
2872 /* 2783 /*
2873 * Page truncated or invalidated. We can freely skip it 2784 * If the page is no longer dirty, or its
2874 * then, even for data integrity operations: the page 2785 * mapping no longer corresponds to inode we
2875 * has disappeared concurrently, so there could be no 2786 * are writing (which means it has been
2876 * real expectation of this data interity operation 2787 * truncated or invalidated), or the page is
2877 * even if there is now a new, dirty page at the same 2788 * already under writeback and we are not
2878 * pagecache address. 2789 * doing a data integrity writeback, skip the page
2879 */ 2790 */
2880 if (unlikely(page->mapping != mapping)) { 2791 if (!PageDirty(page) ||
2881continue_unlock: 2792 (PageWriteback(page) &&
2793 (wbc->sync_mode == WB_SYNC_NONE)) ||
2794 unlikely(page->mapping != mapping)) {
2882 unlock_page(page); 2795 unlock_page(page);
2883 continue; 2796 continue;
2884 } 2797 }
2885 2798
2886 if (!PageDirty(page)) { 2799 if (PageWriteback(page))
2887 /* someone wrote it for us */ 2800 wait_on_page_writeback(page);
2888 goto continue_unlock;
2889 }
2890
2891 if (PageWriteback(page)) {
2892 if (wbc->sync_mode != WB_SYNC_NONE)
2893 wait_on_page_writeback(page);
2894 else
2895 goto continue_unlock;
2896 }
2897 2801
2898 BUG_ON(PageWriteback(page)); 2802 BUG_ON(PageWriteback(page));
2899 if (!clear_page_dirty_for_io(page))
2900 goto continue_unlock;
2901 2803
2902 ret = __mpage_da_writepage(page, wbc, mpd); 2804 if (mpd->next_page != page->index)
2903 if (unlikely(ret)) { 2805 mpd->first_page = page->index;
2904 if (ret == AOP_WRITEPAGE_ACTIVATE) { 2806 mpd->next_page = page->index + 1;
2905 unlock_page(page); 2807 logical = (sector_t) page->index <<
2906 ret = 0; 2808 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2907 } else { 2809
2908 done = 1; 2810 if (!page_has_buffers(page)) {
2909 break; 2811 mpage_add_bh_to_extent(mpd, logical,
2910 } 2812 PAGE_CACHE_SIZE,
2813 (1 << BH_Dirty) | (1 << BH_Uptodate));
2814 if (mpd->io_done)
2815 goto ret_extent_tail;
2816 } else {
2817 /*
2818 * Page with regular buffer heads,
2819 * just add all dirty ones
2820 */
2821 head = page_buffers(page);
2822 bh = head;
2823 do {
2824 BUG_ON(buffer_locked(bh));
2825 /*
2826 * We need to try to allocate
2827 * unmapped blocks in the same page.
2828 * Otherwise we won't make progress
2829 * with the page in ext4_writepage
2830 */
2831 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2832 mpage_add_bh_to_extent(mpd, logical,
2833 bh->b_size,
2834 bh->b_state);
2835 if (mpd->io_done)
2836 goto ret_extent_tail;
2837 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2838 /*
2839 * mapped dirty buffer. We need
2840 * to update the b_state
2841 * because we look at b_state
2842 * in mpage_da_map_blocks. We
2843 * don't update b_size because
2844 * if we find an unmapped
2845 * buffer_head later we need to
2846 * use the b_state flag of that
2847 * buffer_head.
2848 */
2849 if (mpd->b_size == 0)
2850 mpd->b_state = bh->b_state & BH_FLAGS;
2851 }
2852 logical++;
2853 } while ((bh = bh->b_this_page) != head);
2911 } 2854 }
2912 2855
2913 if (nr_to_write > 0) { 2856 if (nr_to_write > 0) {
2914 nr_to_write--; 2857 nr_to_write--;
2915 if (nr_to_write == 0 && 2858 if (nr_to_write == 0 &&
2916 wbc->sync_mode == WB_SYNC_NONE) { 2859 wbc->sync_mode == WB_SYNC_NONE)
2917 /* 2860 /*
2918 * We stop writing back only if we are 2861 * We stop writing back only if we are
2919 * not doing integrity sync. In case of 2862 * not doing integrity sync. In case of
@@ -2924,14 +2867,18 @@ continue_unlock:
2924 * pages, but have not synced all of the 2867 * pages, but have not synced all of the
2925 * old dirty pages. 2868 * old dirty pages.
2926 */ 2869 */
2927 done = 1; 2870 goto out;
2928 break;
2929 }
2930 } 2871 }
2931 } 2872 }
2932 pagevec_release(&pvec); 2873 pagevec_release(&pvec);
2933 cond_resched(); 2874 cond_resched();
2934 } 2875 }
2876 return 0;
2877ret_extent_tail:
2878 ret = MPAGE_DA_EXTENT_TAIL;
2879out:
2880 pagevec_release(&pvec);
2881 cond_resched();
2935 return ret; 2882 return ret;
2936} 2883}
2937 2884
@@ -2945,7 +2892,6 @@ static int ext4_da_writepages(struct address_space *mapping,
2945 struct mpage_da_data mpd; 2892 struct mpage_da_data mpd;
2946 struct inode *inode = mapping->host; 2893 struct inode *inode = mapping->host;
2947 int pages_written = 0; 2894 int pages_written = 0;
2948 long pages_skipped;
2949 unsigned int max_pages; 2895 unsigned int max_pages;
2950 int range_cyclic, cycled = 1, io_done = 0; 2896 int range_cyclic, cycled = 1, io_done = 0;
2951 int needed_blocks, ret = 0; 2897 int needed_blocks, ret = 0;
@@ -3028,11 +2974,6 @@ static int ext4_da_writepages(struct address_space *mapping,
3028 wbc->nr_to_write = desired_nr_to_write; 2974 wbc->nr_to_write = desired_nr_to_write;
3029 } 2975 }
3030 2976
3031 mpd.wbc = wbc;
3032 mpd.inode = mapping->host;
3033
3034 pages_skipped = wbc->pages_skipped;
3035
3036retry: 2977retry:
3037 if (wbc->sync_mode == WB_SYNC_ALL) 2978 if (wbc->sync_mode == WB_SYNC_ALL)
3038 tag_pages_for_writeback(mapping, index, end); 2979 tag_pages_for_writeback(mapping, index, end);
@@ -3059,22 +3000,10 @@ retry:
3059 } 3000 }
3060 3001
3061 /* 3002 /*
3062 * Now call __mpage_da_writepage to find the next 3003 * Now call write_cache_pages_da() to find the next
3063 * contiguous region of logical blocks that need 3004 * contiguous region of logical blocks that need
3064 * blocks to be allocated by ext4. We don't actually 3005 * blocks to be allocated by ext4 and submit them.
3065 * submit the blocks for I/O here, even though
3066 * write_cache_pages thinks it will, and will set the
3067 * pages as clean for write before calling
3068 * __mpage_da_writepage().
3069 */ 3006 */
3070 mpd.b_size = 0;
3071 mpd.b_state = 0;
3072 mpd.b_blocknr = 0;
3073 mpd.first_page = 0;
3074 mpd.next_page = 0;
3075 mpd.io_done = 0;
3076 mpd.pages_written = 0;
3077 mpd.retval = 0;
3078 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); 3007 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
3079 /* 3008 /*
3080 * If we have a contiguous extent of pages and we 3009 * If we have a contiguous extent of pages and we
@@ -3096,7 +3025,6 @@ retry:
3096 * and try again 3025 * and try again
3097 */ 3026 */
3098 jbd2_journal_force_commit_nested(sbi->s_journal); 3027 jbd2_journal_force_commit_nested(sbi->s_journal);
3099 wbc->pages_skipped = pages_skipped;
3100 ret = 0; 3028 ret = 0;
3101 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 3029 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
3102 /* 3030 /*
@@ -3104,7 +3032,6 @@ retry:
3104 * rest of the pages 3032 * rest of the pages
3105 */ 3033 */
3106 pages_written += mpd.pages_written; 3034 pages_written += mpd.pages_written;
3107 wbc->pages_skipped = pages_skipped;
3108 ret = 0; 3035 ret = 0;
3109 io_done = 1; 3036 io_done = 1;
3110 } else if (wbc->nr_to_write) 3037 } else if (wbc->nr_to_write)
@@ -3122,11 +3049,6 @@ retry:
3122 wbc->range_end = mapping->writeback_index - 1; 3049 wbc->range_end = mapping->writeback_index - 1;
3123 goto retry; 3050 goto retry;
3124 } 3051 }
3125 if (pages_skipped != wbc->pages_skipped)
3126 ext4_msg(inode->i_sb, KERN_CRIT,
3127 "This should not happen leaving %s "
3128 "with nr_to_write = %ld ret = %d",
3129 __func__, wbc->nr_to_write, ret);
3130 3052
3131 /* Update index */ 3053 /* Update index */
3132 wbc->range_cyclic = range_cyclic; 3054 wbc->range_cyclic = range_cyclic;
@@ -3383,7 +3305,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3383 * the pages by calling redirty_page_for_writepage() but that 3305 * the pages by calling redirty_page_for_writepage() but that
3384 * would be ugly in the extreme. So instead we would need to 3306 * would be ugly in the extreme. So instead we would need to
3385 * replicate parts of the code in the above functions, 3307 * replicate parts of the code in the above functions,
3386 * simplifying them becuase we wouldn't actually intend to 3308 * simplifying them because we wouldn't actually intend to
3387 * write out the pages, but rather only collect contiguous 3309 * write out the pages, but rather only collect contiguous
3388 * logical block extents, call the multi-block allocator, and 3310 * logical block extents, call the multi-block allocator, and
3389 * then update the buffer heads with the block allocations. 3311 * then update the buffer heads with the block allocations.
@@ -3460,6 +3382,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3460 3382
3461static int ext4_readpage(struct file *file, struct page *page) 3383static int ext4_readpage(struct file *file, struct page *page)
3462{ 3384{
3385 trace_ext4_readpage(page);
3463 return mpage_readpage(page, ext4_get_block); 3386 return mpage_readpage(page, ext4_get_block);
3464} 3387}
3465 3388
@@ -3494,6 +3417,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
3494{ 3417{
3495 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3418 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3496 3419
3420 trace_ext4_invalidatepage(page, offset);
3421
3497 /* 3422 /*
3498 * free any io_end structure allocated for buffers to be discarded 3423 * free any io_end structure allocated for buffers to be discarded
3499 */ 3424 */
@@ -3515,6 +3440,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3515{ 3440{
3516 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3441 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3517 3442
3443 trace_ext4_releasepage(page);
3444
3518 WARN_ON(PageChecked(page)); 3445 WARN_ON(PageChecked(page));
3519 if (!page_has_buffers(page)) 3446 if (!page_has_buffers(page))
3520 return 0; 3447 return 0;
@@ -3768,7 +3695,7 @@ retry:
3768 * 3695 *
3769 * The unwrritten extents will be converted to written when DIO is completed. 3696 * The unwrritten extents will be converted to written when DIO is completed.
3770 * For async direct IO, since the IO may still pending when return, we 3697 * For async direct IO, since the IO may still pending when return, we
3771 * set up an end_io call back function, which will do the convertion 3698 * set up an end_io call back function, which will do the conversion
3772 * when async direct IO completed. 3699 * when async direct IO completed.
3773 * 3700 *
3774 * If the O_DIRECT write will extend the file then add this inode to the 3701 * If the O_DIRECT write will extend the file then add this inode to the
@@ -3791,7 +3718,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3791 * We could direct write to holes and fallocate. 3718 * We could direct write to holes and fallocate.
3792 * 3719 *
3793 * Allocated blocks to fill the hole are marked as uninitialized 3720 * Allocated blocks to fill the hole are marked as uninitialized
3794 * to prevent paralel buffered read to expose the stale data 3721 * to prevent parallel buffered read to expose the stale data
3795 * before DIO complete the data IO. 3722 * before DIO complete the data IO.
3796 * 3723 *
3797 * As to previously fallocated extents, ext4 get_block 3724 * As to previously fallocated extents, ext4 get_block
@@ -3852,7 +3779,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3852 int err; 3779 int err;
3853 /* 3780 /*
3854 * for non AIO case, since the IO is already 3781 * for non AIO case, since the IO is already
3855 * completed, we could do the convertion right here 3782 * completed, we could do the conversion right here
3856 */ 3783 */
3857 err = ext4_convert_unwritten_extents(inode, 3784 err = ext4_convert_unwritten_extents(inode,
3858 offset, ret); 3785 offset, ret);
@@ -3873,11 +3800,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3873{ 3800{
3874 struct file *file = iocb->ki_filp; 3801 struct file *file = iocb->ki_filp;
3875 struct inode *inode = file->f_mapping->host; 3802 struct inode *inode = file->f_mapping->host;
3803 ssize_t ret;
3876 3804
3805 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
3877 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3806 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3878 return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3807 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
3879 3808 else
3880 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); 3809 ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3810 trace_ext4_direct_IO_exit(inode, offset,
3811 iov_length(iov, nr_segs), rw, ret);
3812 return ret;
3881} 3813}
3882 3814
3883/* 3815/*
@@ -3903,7 +3835,6 @@ static const struct address_space_operations ext4_ordered_aops = {
3903 .readpage = ext4_readpage, 3835 .readpage = ext4_readpage,
3904 .readpages = ext4_readpages, 3836 .readpages = ext4_readpages,
3905 .writepage = ext4_writepage, 3837 .writepage = ext4_writepage,
3906 .sync_page = block_sync_page,
3907 .write_begin = ext4_write_begin, 3838 .write_begin = ext4_write_begin,
3908 .write_end = ext4_ordered_write_end, 3839 .write_end = ext4_ordered_write_end,
3909 .bmap = ext4_bmap, 3840 .bmap = ext4_bmap,
@@ -3919,7 +3850,6 @@ static const struct address_space_operations ext4_writeback_aops = {
3919 .readpage = ext4_readpage, 3850 .readpage = ext4_readpage,
3920 .readpages = ext4_readpages, 3851 .readpages = ext4_readpages,
3921 .writepage = ext4_writepage, 3852 .writepage = ext4_writepage,
3922 .sync_page = block_sync_page,
3923 .write_begin = ext4_write_begin, 3853 .write_begin = ext4_write_begin,
3924 .write_end = ext4_writeback_write_end, 3854 .write_end = ext4_writeback_write_end,
3925 .bmap = ext4_bmap, 3855 .bmap = ext4_bmap,
@@ -3935,7 +3865,6 @@ static const struct address_space_operations ext4_journalled_aops = {
3935 .readpage = ext4_readpage, 3865 .readpage = ext4_readpage,
3936 .readpages = ext4_readpages, 3866 .readpages = ext4_readpages,
3937 .writepage = ext4_writepage, 3867 .writepage = ext4_writepage,
3938 .sync_page = block_sync_page,
3939 .write_begin = ext4_write_begin, 3868 .write_begin = ext4_write_begin,
3940 .write_end = ext4_journalled_write_end, 3869 .write_end = ext4_journalled_write_end,
3941 .set_page_dirty = ext4_journalled_set_page_dirty, 3870 .set_page_dirty = ext4_journalled_set_page_dirty,
@@ -3951,7 +3880,6 @@ static const struct address_space_operations ext4_da_aops = {
3951 .readpages = ext4_readpages, 3880 .readpages = ext4_readpages,
3952 .writepage = ext4_writepage, 3881 .writepage = ext4_writepage,
3953 .writepages = ext4_da_writepages, 3882 .writepages = ext4_da_writepages,
3954 .sync_page = block_sync_page,
3955 .write_begin = ext4_da_write_begin, 3883 .write_begin = ext4_da_write_begin,
3956 .write_end = ext4_da_write_end, 3884 .write_end = ext4_da_write_end,
3957 .bmap = ext4_bmap, 3885 .bmap = ext4_bmap,
@@ -4098,7 +4026,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
4098 * 4026 *
4099 * When we do truncate() we may have to clean the ends of several 4027 * When we do truncate() we may have to clean the ends of several
4100 * indirect blocks but leave the blocks themselves alive. Block is 4028 * indirect blocks but leave the blocks themselves alive. Block is
4101 * partially truncated if some data below the new i_size is refered 4029 * partially truncated if some data below the new i_size is referred
4102 * from it (and it is on the path to the first completely truncated 4030 * from it (and it is on the path to the first completely truncated
4103 * data block, indeed). We have to free the top of that path along 4031 * data block, indeed). We have to free the top of that path along
4104 * with everything to the right of the path. Since no allocation 4032 * with everything to the right of the path. Since no allocation
@@ -4177,6 +4105,9 @@ no_top:
4177 * 4105 *
4178 * We release `count' blocks on disk, but (last - first) may be greater 4106 * We release `count' blocks on disk, but (last - first) may be greater
4179 * than `count' because there can be holes in there. 4107 * than `count' because there can be holes in there.
4108 *
4109 * Return 0 on success, 1 on invalid block range
4110 * and < 0 on fatal error.
4180 */ 4111 */
4181static int ext4_clear_blocks(handle_t *handle, struct inode *inode, 4112static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4182 struct buffer_head *bh, 4113 struct buffer_head *bh,
@@ -4203,33 +4134,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4203 if (bh) { 4134 if (bh) {
4204 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4135 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4205 err = ext4_handle_dirty_metadata(handle, inode, bh); 4136 err = ext4_handle_dirty_metadata(handle, inode, bh);
4206 if (unlikely(err)) { 4137 if (unlikely(err))
4207 ext4_std_error(inode->i_sb, err); 4138 goto out_err;
4208 return 1;
4209 }
4210 } 4139 }
4211 err = ext4_mark_inode_dirty(handle, inode); 4140 err = ext4_mark_inode_dirty(handle, inode);
4212 if (unlikely(err)) { 4141 if (unlikely(err))
4213 ext4_std_error(inode->i_sb, err); 4142 goto out_err;
4214 return 1;
4215 }
4216 err = ext4_truncate_restart_trans(handle, inode, 4143 err = ext4_truncate_restart_trans(handle, inode,
4217 blocks_for_truncate(inode)); 4144 blocks_for_truncate(inode));
4218 if (unlikely(err)) { 4145 if (unlikely(err))
4219 ext4_std_error(inode->i_sb, err); 4146 goto out_err;
4220 return 1;
4221 }
4222 if (bh) { 4147 if (bh) {
4223 BUFFER_TRACE(bh, "retaking write access"); 4148 BUFFER_TRACE(bh, "retaking write access");
4224 ext4_journal_get_write_access(handle, bh); 4149 err = ext4_journal_get_write_access(handle, bh);
4150 if (unlikely(err))
4151 goto out_err;
4225 } 4152 }
4226 } 4153 }
4227 4154
4228 for (p = first; p < last; p++) 4155 for (p = first; p < last; p++)
4229 *p = 0; 4156 *p = 0;
4230 4157
4231 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); 4158 ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
4232 return 0; 4159 return 0;
4160out_err:
4161 ext4_std_error(inode->i_sb, err);
4162 return err;
4233} 4163}
4234 4164
4235/** 4165/**
@@ -4240,7 +4170,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4240 * @first: array of block numbers 4170 * @first: array of block numbers
4241 * @last: points immediately past the end of array 4171 * @last: points immediately past the end of array
4242 * 4172 *
4243 * We are freeing all blocks refered from that array (numbers are stored as 4173 * We are freeing all blocks referred from that array (numbers are stored as
4244 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 4174 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
4245 * 4175 *
4246 * We accumulate contiguous runs of blocks to free. Conveniently, if these 4176 * We accumulate contiguous runs of blocks to free. Conveniently, if these
@@ -4263,7 +4193,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4263 ext4_fsblk_t nr; /* Current block # */ 4193 ext4_fsblk_t nr; /* Current block # */
4264 __le32 *p; /* Pointer into inode/ind 4194 __le32 *p; /* Pointer into inode/ind
4265 for current block */ 4195 for current block */
4266 int err; 4196 int err = 0;
4267 4197
4268 if (this_bh) { /* For indirect block */ 4198 if (this_bh) { /* For indirect block */
4269 BUFFER_TRACE(this_bh, "get_write_access"); 4199 BUFFER_TRACE(this_bh, "get_write_access");
@@ -4285,9 +4215,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4285 } else if (nr == block_to_free + count) { 4215 } else if (nr == block_to_free + count) {
4286 count++; 4216 count++;
4287 } else { 4217 } else {
4288 if (ext4_clear_blocks(handle, inode, this_bh, 4218 err = ext4_clear_blocks(handle, inode, this_bh,
4289 block_to_free, count, 4219 block_to_free, count,
4290 block_to_free_p, p)) 4220 block_to_free_p, p);
4221 if (err)
4291 break; 4222 break;
4292 block_to_free = nr; 4223 block_to_free = nr;
4293 block_to_free_p = p; 4224 block_to_free_p = p;
@@ -4296,9 +4227,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4296 } 4227 }
4297 } 4228 }
4298 4229
4299 if (count > 0) 4230 if (!err && count > 0)
4300 ext4_clear_blocks(handle, inode, this_bh, block_to_free, 4231 err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
4301 count, block_to_free_p, p); 4232 count, block_to_free_p, p);
4233 if (err < 0)
4234 /* fatal error */
4235 return;
4302 4236
4303 if (this_bh) { 4237 if (this_bh) {
4304 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); 4238 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@ -4328,7 +4262,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4328 * @last: pointer immediately past the end of array 4262 * @last: pointer immediately past the end of array
4329 * @depth: depth of the branches to free 4263 * @depth: depth of the branches to free
4330 * 4264 *
4331 * We are freeing all blocks refered from these branches (numbers are 4265 * We are freeing all blocks referred from these branches (numbers are
4332 * stored as little-endian 32-bit) and updating @inode->i_blocks 4266 * stored as little-endian 32-bit) and updating @inode->i_blocks
4333 * appropriately. 4267 * appropriately.
4334 */ 4268 */
@@ -4416,7 +4350,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4416 * transaction where the data blocks are 4350 * transaction where the data blocks are
4417 * actually freed. 4351 * actually freed.
4418 */ 4352 */
4419 ext4_free_blocks(handle, inode, 0, nr, 1, 4353 ext4_free_blocks(handle, inode, NULL, nr, 1,
4420 EXT4_FREE_BLOCKS_METADATA| 4354 EXT4_FREE_BLOCKS_METADATA|
4421 EXT4_FREE_BLOCKS_FORGET); 4355 EXT4_FREE_BLOCKS_FORGET);
4422 4356
@@ -4496,10 +4430,12 @@ void ext4_truncate(struct inode *inode)
4496 Indirect chain[4]; 4430 Indirect chain[4];
4497 Indirect *partial; 4431 Indirect *partial;
4498 __le32 nr = 0; 4432 __le32 nr = 0;
4499 int n; 4433 int n = 0;
4500 ext4_lblk_t last_block; 4434 ext4_lblk_t last_block, max_block;
4501 unsigned blocksize = inode->i_sb->s_blocksize; 4435 unsigned blocksize = inode->i_sb->s_blocksize;
4502 4436
4437 trace_ext4_truncate_enter(inode);
4438
4503 if (!ext4_can_truncate(inode)) 4439 if (!ext4_can_truncate(inode))
4504 return; 4440 return;
4505 4441
@@ -4510,6 +4446,7 @@ void ext4_truncate(struct inode *inode)
4510 4446
4511 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 4447 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4512 ext4_ext_truncate(inode); 4448 ext4_ext_truncate(inode);
4449 trace_ext4_truncate_exit(inode);
4513 return; 4450 return;
4514 } 4451 }
4515 4452
@@ -4519,14 +4456,18 @@ void ext4_truncate(struct inode *inode)
4519 4456
4520 last_block = (inode->i_size + blocksize-1) 4457 last_block = (inode->i_size + blocksize-1)
4521 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 4458 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4459 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
4460 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4522 4461
4523 if (inode->i_size & (blocksize - 1)) 4462 if (inode->i_size & (blocksize - 1))
4524 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 4463 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
4525 goto out_stop; 4464 goto out_stop;
4526 4465
4527 n = ext4_block_to_path(inode, last_block, offsets, NULL); 4466 if (last_block != max_block) {
4528 if (n == 0) 4467 n = ext4_block_to_path(inode, last_block, offsets, NULL);
4529 goto out_stop; /* error */ 4468 if (n == 0)
4469 goto out_stop; /* error */
4470 }
4530 4471
4531 /* 4472 /*
4532 * OK. This truncate is going to happen. We add the inode to the 4473 * OK. This truncate is going to happen. We add the inode to the
@@ -4557,7 +4498,13 @@ void ext4_truncate(struct inode *inode)
4557 */ 4498 */
4558 ei->i_disksize = inode->i_size; 4499 ei->i_disksize = inode->i_size;
4559 4500
4560 if (n == 1) { /* direct blocks */ 4501 if (last_block == max_block) {
4502 /*
4503 * It is unnecessary to free any data blocks if last_block is
4504 * equal to the indirect block limit.
4505 */
4506 goto out_unlock;
4507 } else if (n == 1) { /* direct blocks */
4561 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 4508 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
4562 i_data + EXT4_NDIR_BLOCKS); 4509 i_data + EXT4_NDIR_BLOCKS);
4563 goto do_indirects; 4510 goto do_indirects;
@@ -4617,6 +4564,7 @@ do_indirects:
4617 ; 4564 ;
4618 } 4565 }
4619 4566
4567out_unlock:
4620 up_write(&ei->i_data_sem); 4568 up_write(&ei->i_data_sem);
4621 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4569 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4622 ext4_mark_inode_dirty(handle, inode); 4570 ext4_mark_inode_dirty(handle, inode);
@@ -4639,6 +4587,7 @@ out_stop:
4639 ext4_orphan_del(handle, inode); 4587 ext4_orphan_del(handle, inode);
4640 4588
4641 ext4_journal_stop(handle); 4589 ext4_journal_stop(handle);
4590 trace_ext4_truncate_exit(inode);
4642} 4591}
4643 4592
4644/* 4593/*
@@ -4770,6 +4719,7 @@ make_io:
4770 * has in-inode xattrs, or we don't have this inode in memory. 4719 * has in-inode xattrs, or we don't have this inode in memory.
4771 * Read the block from disk. 4720 * Read the block from disk.
4772 */ 4721 */
4722 trace_ext4_load_inode(inode);
4773 get_bh(bh); 4723 get_bh(bh);
4774 bh->b_end_io = end_buffer_read_sync; 4724 bh->b_end_io = end_buffer_read_sync;
4775 submit_bh(READ_META, bh); 4725 submit_bh(READ_META, bh);
@@ -4875,7 +4825,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4875 return inode; 4825 return inode;
4876 4826
4877 ei = EXT4_I(inode); 4827 ei = EXT4_I(inode);
4878 iloc.bh = 0; 4828 iloc.bh = NULL;
4879 4829
4880 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4830 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4881 if (ret < 0) 4831 if (ret < 0)
@@ -5460,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
5460 /* if nrblocks are contiguous */ 5410 /* if nrblocks are contiguous */
5461 if (chunk) { 5411 if (chunk) {
5462 /* 5412 /*
5463 * With N contiguous data blocks, it need at most 5413 * With N contiguous data blocks, we need at most
5464 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks 5414 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
5465 * 2 dindirect blocks 5415 * 2 dindirect blocks, and 1 tindirect block
5466 * 1 tindirect block
5467 */ 5416 */
5468 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); 5417 return DIV_ROUND_UP(nrblocks,
5469 return indirects + 3; 5418 EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
5470 } 5419 }
5471 /* 5420 /*
5472 * if nrblocks are not contiguous, worse case, each block touch 5421 * if nrblocks are not contiguous, worse case, each block touch
@@ -5540,7 +5489,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5540} 5489}
5541 5490
5542/* 5491/*
5543 * Calulate the total number of credits to reserve to fit 5492 * Calculate the total number of credits to reserve to fit
5544 * the modification of a single pages into a single transaction, 5493 * the modification of a single pages into a single transaction,
5545 * which may include multiple chunks of block allocations. 5494 * which may include multiple chunks of block allocations.
5546 * 5495 *
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index eb3bc2fe647e..808c554e773f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
38 unsigned int oldflags; 38 unsigned int oldflags;
39 unsigned int jflag; 39 unsigned int jflag;
40 40
41 if (!is_owner_or_cap(inode)) 41 if (!inode_owner_or_capable(inode))
42 return -EACCES; 42 return -EACCES;
43 43
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
@@ -146,7 +146,7 @@ flags_out:
146 __u32 generation; 146 __u32 generation;
147 int err; 147 int err;
148 148
149 if (!is_owner_or_cap(inode)) 149 if (!inode_owner_or_capable(inode))
150 return -EPERM; 150 return -EPERM;
151 151
152 err = mnt_want_write(filp->f_path.mnt); 152 err = mnt_want_write(filp->f_path.mnt);
@@ -298,7 +298,7 @@ mext_out:
298 case EXT4_IOC_MIGRATE: 298 case EXT4_IOC_MIGRATE:
299 { 299 {
300 int err; 300 int err;
301 if (!is_owner_or_cap(inode)) 301 if (!inode_owner_or_capable(inode))
302 return -EACCES; 302 return -EACCES;
303 303
304 err = mnt_want_write(filp->f_path.mnt); 304 err = mnt_want_write(filp->f_path.mnt);
@@ -320,7 +320,7 @@ mext_out:
320 case EXT4_IOC_ALLOC_DA_BLKS: 320 case EXT4_IOC_ALLOC_DA_BLKS:
321 { 321 {
322 int err; 322 int err;
323 if (!is_owner_or_cap(inode)) 323 if (!inode_owner_or_capable(inode))
324 return -EACCES; 324 return -EACCES;
325 325
326 err = mnt_want_write(filp->f_path.mnt); 326 err = mnt_want_write(filp->f_path.mnt);
@@ -334,16 +334,22 @@ mext_out:
334 case FITRIM: 334 case FITRIM:
335 { 335 {
336 struct super_block *sb = inode->i_sb; 336 struct super_block *sb = inode->i_sb;
337 struct request_queue *q = bdev_get_queue(sb->s_bdev);
337 struct fstrim_range range; 338 struct fstrim_range range;
338 int ret = 0; 339 int ret = 0;
339 340
340 if (!capable(CAP_SYS_ADMIN)) 341 if (!capable(CAP_SYS_ADMIN))
341 return -EPERM; 342 return -EPERM;
342 343
344 if (!blk_queue_discard(q))
345 return -EOPNOTSUPP;
346
343 if (copy_from_user(&range, (struct fstrim_range *)arg, 347 if (copy_from_user(&range, (struct fstrim_range *)arg,
344 sizeof(range))) 348 sizeof(range)))
345 return -EFAULT; 349 return -EFAULT;
346 350
351 range.minlen = max((unsigned int)range.minlen,
352 q->limits.discard_granularity);
347 ret = ext4_trim_fs(sb, &range); 353 ret = ext4_trim_fs(sb, &range);
348 if (ret < 0) 354 if (ret < 0)
349 return ret; 355 return ret;
@@ -421,6 +427,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
421 return err; 427 return err;
422 } 428 }
423 case EXT4_IOC_MOVE_EXT: 429 case EXT4_IOC_MOVE_EXT:
430 case FITRIM:
424 break; 431 break;
425 default: 432 default:
426 return -ENOIOCTLCMD; 433 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d1fe09aea73d..d8a16eecf1d5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -92,7 +92,7 @@
92 * between CPUs. It is possible to get scheduled at this point. 92 * between CPUs. It is possible to get scheduled at this point.
93 * 93 *
94 * The locality group prealloc space is used looking at whether we have 94 * The locality group prealloc space is used looking at whether we have
95 * enough free space (pa_free) withing the prealloc space. 95 * enough free space (pa_free) within the prealloc space.
96 * 96 *
97 * If we can't allocate blocks via inode prealloc or/and locality group 97 * If we can't allocate blocks via inode prealloc or/and locality group
98 * prealloc then we look at the buddy cache. The buddy cache is represented 98 * prealloc then we look at the buddy cache. The buddy cache is represented
@@ -432,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
432 } 432 }
433 433
434 /* at order 0 we see each particular block */ 434 /* at order 0 we see each particular block */
435 *max = 1 << (e4b->bd_blkbits + 3); 435 if (order == 0) {
436 if (order == 0) 436 *max = 1 << (e4b->bd_blkbits + 3);
437 return EXT4_MB_BITMAP(e4b); 437 return EXT4_MB_BITMAP(e4b);
438 }
438 439
439 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 440 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
440 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; 441 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
@@ -616,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
616 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); 617 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
617 618
618 grp = ext4_get_group_info(sb, e4b->bd_group); 619 grp = ext4_get_group_info(sb, e4b->bd_group);
619 buddy = mb_find_buddy(e4b, 0, &max);
620 list_for_each(cur, &grp->bb_prealloc_list) { 620 list_for_each(cur, &grp->bb_prealloc_list) {
621 ext4_group_t groupnr; 621 ext4_group_t groupnr;
622 struct ext4_prealloc_space *pa; 622 struct ext4_prealloc_space *pa;
@@ -635,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
635#define mb_check_buddy(e4b) 635#define mb_check_buddy(e4b)
636#endif 636#endif
637 637
638/* FIXME!! need more doc */ 638/*
639 * Divide blocks started from @first with length @len into
640 * smaller chunks with power of 2 blocks.
641 * Clear the bits in bitmap which the blocks of the chunk(s) covered,
642 * then increase bb_counters[] for corresponded chunk size.
643 */
639static void ext4_mb_mark_free_simple(struct super_block *sb, 644static void ext4_mb_mark_free_simple(struct super_block *sb,
640 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, 645 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
641 struct ext4_group_info *grp) 646 struct ext4_group_info *grp)
@@ -2381,7 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2381 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte 2386 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2382 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. 2387 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2383 * So a two level scheme suffices for now. */ 2388 * So a two level scheme suffices for now. */
2384 sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); 2389 sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
2385 if (sbi->s_group_info == NULL) { 2390 if (sbi->s_group_info == NULL) {
2386 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); 2391 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2387 return -ENOMEM; 2392 return -ENOMEM;
@@ -3208,7 +3213,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3208 cur_distance = abs(goal_block - cpa->pa_pstart); 3213 cur_distance = abs(goal_block - cpa->pa_pstart);
3209 new_distance = abs(goal_block - pa->pa_pstart); 3214 new_distance = abs(goal_block - pa->pa_pstart);
3210 3215
3211 if (cur_distance < new_distance) 3216 if (cur_distance <= new_distance)
3212 return cpa; 3217 return cpa;
3213 3218
3214 /* drop the previous reference */ 3219 /* drop the previous reference */
@@ -3907,7 +3912,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3907 struct super_block *sb = ac->ac_sb; 3912 struct super_block *sb = ac->ac_sb;
3908 ext4_group_t ngroups, i; 3913 ext4_group_t ngroups, i;
3909 3914
3910 if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) 3915 if (!mb_enable_debug ||
3916 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3911 return; 3917 return;
3912 3918
3913 printk(KERN_ERR "EXT4-fs: Can't allocate:" 3919 printk(KERN_ERR "EXT4-fs: Can't allocate:"
@@ -4753,7 +4759,8 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
4753 * bitmap. Then issue a TRIM command on this extent and free the extent in 4759 * bitmap. Then issue a TRIM command on this extent and free the extent in
4754 * the group buddy bitmap. This is done until whole group is scanned. 4760 * the group buddy bitmap. This is done until whole group is scanned.
4755 */ 4761 */
4756ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, 4762static ext4_grpblk_t
4763ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4757 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) 4764 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
4758{ 4765{
4759 void *bitmap; 4766 void *bitmap;
@@ -4863,10 +4870,15 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4863 break; 4870 break;
4864 } 4871 }
4865 4872
4866 if (len >= EXT4_BLOCKS_PER_GROUP(sb)) 4873 /*
4867 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); 4874 * For all the groups except the last one, last block will
4868 else 4875 * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
4876 * change it for the last group in which case start +
4877 * len < EXT4_BLOCKS_PER_GROUP(sb).
4878 */
4879 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
4869 last_block = first_block + len; 4880 last_block = first_block + len;
4881 len -= last_block - first_block;
4870 4882
4871 if (e4b.bd_info->bb_free >= minlen) { 4883 if (e4b.bd_info->bb_free >= minlen) {
4872 cnt = ext4_trim_all_free(sb, &e4b, first_block, 4884 cnt = ext4_trim_all_free(sb, &e4b, first_block,
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b619322c76f0..22bd4d7f289b 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -169,7 +169,7 @@ struct ext4_allocation_context {
169 /* original request */ 169 /* original request */
170 struct ext4_free_extent ac_o_ex; 170 struct ext4_free_extent ac_o_ex;
171 171
172 /* goal request (after normalization) */ 172 /* goal request (normalized ac_o_ex) */
173 struct ext4_free_extent ac_g_ex; 173 struct ext4_free_extent ac_g_ex;
174 174
175 /* the best found extent */ 175 /* the best found extent */
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b0a126f23c20..92816b4e0f16 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -263,7 +263,7 @@ static int free_dind_blocks(handle_t *handle,
263 for (i = 0; i < max_entries; i++) { 263 for (i = 0; i < max_entries; i++) {
264 if (tmp_idata[i]) { 264 if (tmp_idata[i]) {
265 extend_credit_for_blkdel(handle, inode); 265 extend_credit_for_blkdel(handle, inode);
266 ext4_free_blocks(handle, inode, 0, 266 ext4_free_blocks(handle, inode, NULL,
267 le32_to_cpu(tmp_idata[i]), 1, 267 le32_to_cpu(tmp_idata[i]), 1,
268 EXT4_FREE_BLOCKS_METADATA | 268 EXT4_FREE_BLOCKS_METADATA |
269 EXT4_FREE_BLOCKS_FORGET); 269 EXT4_FREE_BLOCKS_FORGET);
@@ -271,7 +271,7 @@ static int free_dind_blocks(handle_t *handle,
271 } 271 }
272 put_bh(bh); 272 put_bh(bh);
273 extend_credit_for_blkdel(handle, inode); 273 extend_credit_for_blkdel(handle, inode);
274 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, 274 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
275 EXT4_FREE_BLOCKS_METADATA | 275 EXT4_FREE_BLOCKS_METADATA |
276 EXT4_FREE_BLOCKS_FORGET); 276 EXT4_FREE_BLOCKS_FORGET);
277 return 0; 277 return 0;
@@ -302,7 +302,7 @@ static int free_tind_blocks(handle_t *handle,
302 } 302 }
303 put_bh(bh); 303 put_bh(bh);
304 extend_credit_for_blkdel(handle, inode); 304 extend_credit_for_blkdel(handle, inode);
305 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, 305 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
306 EXT4_FREE_BLOCKS_METADATA | 306 EXT4_FREE_BLOCKS_METADATA |
307 EXT4_FREE_BLOCKS_FORGET); 307 EXT4_FREE_BLOCKS_FORGET);
308 return 0; 308 return 0;
@@ -315,7 +315,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
315 /* ei->i_data[EXT4_IND_BLOCK] */ 315 /* ei->i_data[EXT4_IND_BLOCK] */
316 if (i_data[0]) { 316 if (i_data[0]) {
317 extend_credit_for_blkdel(handle, inode); 317 extend_credit_for_blkdel(handle, inode);
318 ext4_free_blocks(handle, inode, 0, 318 ext4_free_blocks(handle, inode, NULL,
319 le32_to_cpu(i_data[0]), 1, 319 le32_to_cpu(i_data[0]), 1,
320 EXT4_FREE_BLOCKS_METADATA | 320 EXT4_FREE_BLOCKS_METADATA |
321 EXT4_FREE_BLOCKS_FORGET); 321 EXT4_FREE_BLOCKS_FORGET);
@@ -428,7 +428,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
428 } 428 }
429 put_bh(bh); 429 put_bh(bh);
430 extend_credit_for_blkdel(handle, inode); 430 extend_credit_for_blkdel(handle, inode);
431 ext4_free_blocks(handle, inode, 0, block, 1, 431 ext4_free_blocks(handle, inode, NULL, block, 1,
432 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 432 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
433 return retval; 433 return retval;
434} 434}
@@ -517,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode)
517 * start with one credit accounted for 517 * start with one credit accounted for
518 * superblock modification. 518 * superblock modification.
519 * 519 *
520 * For the tmp_inode we already have commited the 520 * For the tmp_inode we already have committed the
521 * trascation that created the inode. Later as and 521 * trascation that created the inode. Later as and
522 * when we add extents we extent the journal 522 * when we add extents we extent the journal
523 */ 523 */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e781b7ea5630..67fd0b025858 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -40,6 +40,7 @@
40#include "xattr.h" 40#include "xattr.h"
41#include "acl.h" 41#include "acl.h"
42 42
43#include <trace/events/ext4.h>
43/* 44/*
44 * define how far ahead to read directories while searching them. 45 * define how far ahead to read directories while searching them.
45 */ 46 */
@@ -2183,6 +2184,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2183 struct ext4_dir_entry_2 *de; 2184 struct ext4_dir_entry_2 *de;
2184 handle_t *handle; 2185 handle_t *handle;
2185 2186
2187 trace_ext4_unlink_enter(dir, dentry);
2186 /* Initialize quotas before so that eventual writes go 2188 /* Initialize quotas before so that eventual writes go
2187 * in separate transaction */ 2189 * in separate transaction */
2188 dquot_initialize(dir); 2190 dquot_initialize(dir);
@@ -2228,6 +2230,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2228end_unlink: 2230end_unlink:
2229 ext4_journal_stop(handle); 2231 ext4_journal_stop(handle);
2230 brelse(bh); 2232 brelse(bh);
2233 trace_ext4_unlink_exit(dentry, retval);
2231 return retval; 2234 return retval;
2232} 2235}
2233 2236
@@ -2402,6 +2405,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2402 if (!new_inode && new_dir != old_dir && 2405 if (!new_inode && new_dir != old_dir &&
2403 EXT4_DIR_LINK_MAX(new_dir)) 2406 EXT4_DIR_LINK_MAX(new_dir))
2404 goto end_rename; 2407 goto end_rename;
2408 BUFFER_TRACE(dir_bh, "get_write_access");
2409 retval = ext4_journal_get_write_access(handle, dir_bh);
2410 if (retval)
2411 goto end_rename;
2405 } 2412 }
2406 if (!new_bh) { 2413 if (!new_bh) {
2407 retval = ext4_add_entry(handle, new_dentry, old_inode); 2414 retval = ext4_add_entry(handle, new_dentry, old_inode);
@@ -2409,7 +2416,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2409 goto end_rename; 2416 goto end_rename;
2410 } else { 2417 } else {
2411 BUFFER_TRACE(new_bh, "get write access"); 2418 BUFFER_TRACE(new_bh, "get write access");
2412 ext4_journal_get_write_access(handle, new_bh); 2419 retval = ext4_journal_get_write_access(handle, new_bh);
2420 if (retval)
2421 goto end_rename;
2413 new_de->inode = cpu_to_le32(old_inode->i_ino); 2422 new_de->inode = cpu_to_le32(old_inode->i_ino);
2414 if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, 2423 if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
2415 EXT4_FEATURE_INCOMPAT_FILETYPE)) 2424 EXT4_FEATURE_INCOMPAT_FILETYPE))
@@ -2470,8 +2479,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2470 old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); 2479 old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
2471 ext4_update_dx_flag(old_dir); 2480 ext4_update_dx_flag(old_dir);
2472 if (dir_bh) { 2481 if (dir_bh) {
2473 BUFFER_TRACE(dir_bh, "get_write_access");
2474 ext4_journal_get_write_access(handle, dir_bh);
2475 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2482 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2476 cpu_to_le32(new_dir->i_ino); 2483 cpu_to_le32(new_dir->i_ino);
2477 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2484 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 955cc309142f..b6dbd056fcb1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -259,6 +259,11 @@ static void ext4_end_bio(struct bio *bio, int error)
259 bi_sector >> (inode->i_blkbits - 9)); 259 bi_sector >> (inode->i_blkbits - 9));
260 } 260 }
261 261
262 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
263 ext4_free_io_end(io_end);
264 return;
265 }
266
262 /* Add the io_end to per-inode completed io list*/ 267 /* Add the io_end to per-inode completed io list*/
263 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 268 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
264 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); 269 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
@@ -279,9 +284,9 @@ void ext4_io_submit(struct ext4_io_submit *io)
279 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); 284 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
280 bio_put(io->io_bio); 285 bio_put(io->io_bio);
281 } 286 }
282 io->io_bio = 0; 287 io->io_bio = NULL;
283 io->io_op = 0; 288 io->io_op = 0;
284 io->io_end = 0; 289 io->io_end = NULL;
285} 290}
286 291
287static int io_submit_init(struct ext4_io_submit *io, 292static int io_submit_init(struct ext4_io_submit *io,
@@ -310,8 +315,7 @@ static int io_submit_init(struct ext4_io_submit *io,
310 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 315 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
311 316
312 io->io_bio = bio; 317 io->io_bio = bio;
313 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? 318 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
314 WRITE_SYNC_PLUG : WRITE);
315 io->io_next_block = bh->b_blocknr; 319 io->io_next_block = bh->b_blocknr;
316 return 0; 320 return 0;
317} 321}
@@ -381,8 +385,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
381 385
382 BUG_ON(!PageLocked(page)); 386 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 387 BUG_ON(PageWriteback(page));
384 set_page_writeback(page);
385 ClearPageError(page);
386 388
387 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); 389 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
388 if (!io_page) { 390 if (!io_page) {
@@ -393,6 +395,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
393 io_page->p_page = page; 395 io_page->p_page = page;
394 atomic_set(&io_page->p_count, 1); 396 atomic_set(&io_page->p_count, 1);
395 get_page(page); 397 get_page(page);
398 set_page_writeback(page);
399 ClearPageError(page);
396 400
397 for (bh = head = page_buffers(page), block_start = 0; 401 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 402 bh != head || !block_start;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3ecc6e45d2f9..80bbc9c60c24 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -230,7 +230,7 @@ static int setup_new_group_blocks(struct super_block *sb,
230 } 230 }
231 231
232 /* Zero out all of the reserved backup group descriptor table blocks */ 232 /* Zero out all of the reserved backup group descriptor table blocks */
233 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", 233 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
234 block, sbi->s_itb_per_group); 234 block, sbi->s_itb_per_group);
235 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, 235 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
236 GFP_NOFS); 236 GFP_NOFS);
@@ -248,7 +248,7 @@ static int setup_new_group_blocks(struct super_block *sb,
248 248
249 /* Zero out all of the inode table blocks */ 249 /* Zero out all of the inode table blocks */
250 block = input->inode_table; 250 block = input->inode_table;
251 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", 251 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
252 block, sbi->s_itb_per_group); 252 block, sbi->s_itb_per_group);
253 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 253 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
254 if (err) 254 if (err)
@@ -499,12 +499,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
499 return err; 499 return err;
500 500
501exit_inode: 501exit_inode:
502 /* ext4_journal_release_buffer(handle, iloc.bh); */ 502 /* ext4_handle_release_buffer(handle, iloc.bh); */
503 brelse(iloc.bh); 503 brelse(iloc.bh);
504exit_dindj: 504exit_dindj:
505 /* ext4_journal_release_buffer(handle, dind); */ 505 /* ext4_handle_release_buffer(handle, dind); */
506exit_sbh: 506exit_sbh:
507 /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ 507 /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
508exit_dind: 508exit_dind:
509 brelse(dind); 509 brelse(dind);
510exit_bh: 510exit_bh:
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
586 /* 586 /*
587 int j; 587 int j;
588 for (j = 0; j < i; j++) 588 for (j = 0; j < i; j++)
589 ext4_journal_release_buffer(handle, primary[j]); 589 ext4_handle_release_buffer(handle, primary[j]);
590 */ 590 */
591 goto exit_bh; 591 goto exit_bh;
592 } 592 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 203f9e4a70be..8553dfb310af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -54,9 +54,9 @@
54 54
55static struct proc_dir_entry *ext4_proc_root; 55static struct proc_dir_entry *ext4_proc_root;
56static struct kset *ext4_kset; 56static struct kset *ext4_kset;
57struct ext4_lazy_init *ext4_li_info; 57static struct ext4_lazy_init *ext4_li_info;
58struct mutex ext4_li_mtx; 58static struct mutex ext4_li_mtx;
59struct ext4_features *ext4_feat; 59static struct ext4_features *ext4_feat;
60 60
61static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 61static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
62 unsigned long journal_devnum); 62 unsigned long journal_devnum);
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb);
75static int ext4_freeze(struct super_block *sb); 75static int ext4_freeze(struct super_block *sb);
76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static int ext4_feature_set_ok(struct super_block *sb, int readonly);
78static void ext4_destroy_lazyinit_thread(void); 79static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 80static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void); 81static void ext4_clear_request_list(void);
@@ -241,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
241 * journal_end calls result in the superblock being marked dirty, so 242 * journal_end calls result in the superblock being marked dirty, so
242 * that sync() will call the filesystem's write_super callback if 243 * that sync() will call the filesystem's write_super callback if
243 * appropriate. 244 * appropriate.
245 *
246 * To avoid j_barrier hold in userspace when a user calls freeze(),
247 * ext4 prevents a new handle from being started by s_frozen, which
248 * is in an upper layer.
244 */ 249 */
245handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 250handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
246{ 251{
247 journal_t *journal; 252 journal_t *journal;
253 handle_t *handle;
248 254
249 if (sb->s_flags & MS_RDONLY) 255 if (sb->s_flags & MS_RDONLY)
250 return ERR_PTR(-EROFS); 256 return ERR_PTR(-EROFS);
251 257
252 vfs_check_frozen(sb, SB_FREEZE_TRANS);
253 /* Special case here: if the journal has aborted behind our
254 * backs (eg. EIO in the commit thread), then we still need to
255 * take the FS itself readonly cleanly. */
256 journal = EXT4_SB(sb)->s_journal; 258 journal = EXT4_SB(sb)->s_journal;
257 if (journal) { 259 handle = ext4_journal_current_handle();
258 if (is_journal_aborted(journal)) { 260
259 ext4_abort(sb, "Detected aborted journal"); 261 /*
260 return ERR_PTR(-EROFS); 262 * If a handle has been started, it should be allowed to
261 } 263 * finish, otherwise deadlock could happen between freeze
262 return jbd2_journal_start(journal, nblocks); 264 * and others(e.g. truncate) due to the restart of the
265 * journal handle if the filesystem is forzen and active
266 * handles are not stopped.
267 */
268 if (!handle)
269 vfs_check_frozen(sb, SB_FREEZE_TRANS);
270
271 if (!journal)
272 return ext4_get_nojournal();
273 /*
274 * Special case here: if the journal has aborted behind our
275 * backs (eg. EIO in the commit thread), then we still need to
276 * take the FS itself readonly cleanly.
277 */
278 if (is_journal_aborted(journal)) {
279 ext4_abort(sb, "Detected aborted journal");
280 return ERR_PTR(-EROFS);
263 } 281 }
264 return ext4_get_nojournal(); 282 return jbd2_journal_start(journal, nblocks);
265} 283}
266 284
267/* 285/*
@@ -594,7 +612,7 @@ __acquires(bitlock)
594 612
595 vaf.fmt = fmt; 613 vaf.fmt = fmt;
596 vaf.va = &args; 614 vaf.va = &args;
597 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", 615 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
598 sb->s_id, function, line, grp); 616 sb->s_id, function, line, grp);
599 if (ino) 617 if (ino)
600 printk(KERN_CONT "inode %lu: ", ino); 618 printk(KERN_CONT "inode %lu: ", ino);
@@ -616,7 +634,7 @@ __acquires(bitlock)
616 * filesystem will have already been marked read/only and the 634 * filesystem will have already been marked read/only and the
617 * journal has been aborted. We return 1 as a hint to callers 635 * journal has been aborted. We return 1 as a hint to callers
618 * who might what to use the return value from 636 * who might what to use the return value from
619 * ext4_grp_locked_error() to distinguish beween the 637 * ext4_grp_locked_error() to distinguish between the
620 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 638 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
621 * aggressively from the ext4 function in question, with a 639 * aggressively from the ext4 function in question, with a
622 * more appropriate error code. 640 * more appropriate error code.
@@ -997,13 +1015,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
997 if (test_opt(sb, OLDALLOC)) 1015 if (test_opt(sb, OLDALLOC))
998 seq_puts(seq, ",oldalloc"); 1016 seq_puts(seq, ",oldalloc");
999#ifdef CONFIG_EXT4_FS_XATTR 1017#ifdef CONFIG_EXT4_FS_XATTR
1000 if (test_opt(sb, XATTR_USER) && 1018 if (test_opt(sb, XATTR_USER))
1001 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
1002 seq_puts(seq, ",user_xattr"); 1019 seq_puts(seq, ",user_xattr");
1003 if (!test_opt(sb, XATTR_USER) && 1020 if (!test_opt(sb, XATTR_USER))
1004 (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
1005 seq_puts(seq, ",nouser_xattr"); 1021 seq_puts(seq, ",nouser_xattr");
1006 }
1007#endif 1022#endif
1008#ifdef CONFIG_EXT4_FS_POSIX_ACL 1023#ifdef CONFIG_EXT4_FS_POSIX_ACL
1009 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1024 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
@@ -1041,8 +1056,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1041 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1056 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1042 seq_puts(seq, ",nodelalloc"); 1057 seq_puts(seq, ",nodelalloc");
1043 1058
1044 if (test_opt(sb, MBLK_IO_SUBMIT)) 1059 if (!test_opt(sb, MBLK_IO_SUBMIT))
1045 seq_puts(seq, ",mblk_io_submit"); 1060 seq_puts(seq, ",nomblk_io_submit");
1046 if (sbi->s_stripe) 1061 if (sbi->s_stripe)
1047 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1062 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1048 /* 1063 /*
@@ -1451,7 +1466,7 @@ static int parse_options(char *options, struct super_block *sb,
1451 * Initialize args struct so we know whether arg was 1466 * Initialize args struct so we know whether arg was
1452 * found; some options take optional arguments. 1467 * found; some options take optional arguments.
1453 */ 1468 */
1454 args[0].to = args[0].from = 0; 1469 args[0].to = args[0].from = NULL;
1455 token = match_token(p, tokens, args); 1470 token = match_token(p, tokens, args);
1456 switch (token) { 1471 switch (token) {
1457 case Opt_bsd_df: 1472 case Opt_bsd_df:
@@ -1771,7 +1786,7 @@ set_qf_format:
1771 return 0; 1786 return 0;
1772 if (option < 0 || option > (1 << 30)) 1787 if (option < 0 || option > (1 << 30))
1773 return 0; 1788 return 0;
1774 if (!is_power_of_2(option)) { 1789 if (option && !is_power_of_2(option)) {
1775 ext4_msg(sb, KERN_ERR, 1790 ext4_msg(sb, KERN_ERR,
1776 "EXT4-fs: inode_readahead_blks" 1791 "EXT4-fs: inode_readahead_blks"
1777 " must be a power of 2"); 1792 " must be a power of 2");
@@ -2120,6 +2135,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2120 return; 2135 return;
2121 } 2136 }
2122 2137
2138 /* Check if feature set would not allow a r/w mount */
2139 if (!ext4_feature_set_ok(sb, 0)) {
2140 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2141 "unknown ROCOMPAT features");
2142 return;
2143 }
2144
2123 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2145 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2124 if (es->s_last_orphan) 2146 if (es->s_last_orphan)
2125 jbd_debug(1, "Errors on filesystem, " 2147 jbd_debug(1, "Errors on filesystem, "
@@ -2412,7 +2434,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2412 if (parse_strtoul(buf, 0x40000000, &t)) 2434 if (parse_strtoul(buf, 0x40000000, &t))
2413 return -EINVAL; 2435 return -EINVAL;
2414 2436
2415 if (!is_power_of_2(t)) 2437 if (t && !is_power_of_2(t))
2416 return -EINVAL; 2438 return -EINVAL;
2417 2439
2418 sbi->s_inode_readahead_blks = t; 2440 sbi->s_inode_readahead_blks = t;
@@ -2970,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
2970 mutex_unlock(&ext4_li_info->li_list_mtx); 2992 mutex_unlock(&ext4_li_info->li_list_mtx);
2971 2993
2972 sbi->s_li_request = elr; 2994 sbi->s_li_request = elr;
2995 /*
2996 * set elr to NULL here since it has been inserted to
2997 * the request_list and the removal and free of it is
2998 * handled by ext4_clear_request_list from now on.
2999 */
3000 elr = NULL;
2973 3001
2974 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3002 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2975 ret = ext4_run_lazyinit_thread(); 3003 ret = ext4_run_lazyinit_thread();
@@ -3095,14 +3123,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3095 } 3123 }
3096 if (def_mount_opts & EXT4_DEFM_UID16) 3124 if (def_mount_opts & EXT4_DEFM_UID16)
3097 set_opt(sb, NO_UID32); 3125 set_opt(sb, NO_UID32);
3126 /* xattr user namespace & acls are now defaulted on */
3098#ifdef CONFIG_EXT4_FS_XATTR 3127#ifdef CONFIG_EXT4_FS_XATTR
3099 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 3128 set_opt(sb, XATTR_USER);
3100 set_opt(sb, XATTR_USER);
3101#endif 3129#endif
3102#ifdef CONFIG_EXT4_FS_POSIX_ACL 3130#ifdef CONFIG_EXT4_FS_POSIX_ACL
3103 if (def_mount_opts & EXT4_DEFM_ACL) 3131 set_opt(sb, POSIX_ACL);
3104 set_opt(sb, POSIX_ACL);
3105#endif 3132#endif
3133 set_opt(sb, MBLK_IO_SUBMIT);
3106 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3134 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3107 set_opt(sb, JOURNAL_DATA); 3135 set_opt(sb, JOURNAL_DATA);
3108 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3136 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3380,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3380 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3408 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3381 spin_lock_init(&sbi->s_next_gen_lock); 3409 spin_lock_init(&sbi->s_next_gen_lock);
3382 3410
3411 init_timer(&sbi->s_err_report);
3412 sbi->s_err_report.function = print_daily_error_info;
3413 sbi->s_err_report.data = (unsigned long) sb;
3414
3383 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3415 err = percpu_counter_init(&sbi->s_freeblocks_counter,
3384 ext4_count_free_blocks(sb)); 3416 ext4_count_free_blocks(sb));
3385 if (!err) { 3417 if (!err) {
@@ -3516,7 +3548,7 @@ no_journal:
3516 * concurrency isn't really necessary. Limit it to 1. 3548 * concurrency isn't really necessary. Limit it to 1.
3517 */ 3549 */
3518 EXT4_SB(sb)->dio_unwritten_wq = 3550 EXT4_SB(sb)->dio_unwritten_wq =
3519 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1); 3551 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3520 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3552 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3521 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3553 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3522 goto failed_mount_wq; 3554 goto failed_mount_wq;
@@ -3531,17 +3563,16 @@ no_journal:
3531 if (IS_ERR(root)) { 3563 if (IS_ERR(root)) {
3532 ext4_msg(sb, KERN_ERR, "get root inode failed"); 3564 ext4_msg(sb, KERN_ERR, "get root inode failed");
3533 ret = PTR_ERR(root); 3565 ret = PTR_ERR(root);
3566 root = NULL;
3534 goto failed_mount4; 3567 goto failed_mount4;
3535 } 3568 }
3536 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 3569 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
3537 iput(root);
3538 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 3570 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
3539 goto failed_mount4; 3571 goto failed_mount4;
3540 } 3572 }
3541 sb->s_root = d_alloc_root(root); 3573 sb->s_root = d_alloc_root(root);
3542 if (!sb->s_root) { 3574 if (!sb->s_root) {
3543 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3575 ext4_msg(sb, KERN_ERR, "get root dentry failed");
3544 iput(root);
3545 ret = -ENOMEM; 3576 ret = -ENOMEM;
3546 goto failed_mount4; 3577 goto failed_mount4;
3547 } 3578 }
@@ -3642,9 +3673,6 @@ no_journal:
3642 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3673 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3643 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3674 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3644 3675
3645 init_timer(&sbi->s_err_report);
3646 sbi->s_err_report.function = print_daily_error_info;
3647 sbi->s_err_report.data = (unsigned long) sb;
3648 if (es->s_error_count) 3676 if (es->s_error_count)
3649 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3677 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3650 3678
@@ -3657,6 +3685,8 @@ cantfind_ext4:
3657 goto failed_mount; 3685 goto failed_mount;
3658 3686
3659failed_mount4: 3687failed_mount4:
3688 iput(root);
3689 sb->s_root = NULL;
3660 ext4_msg(sb, KERN_ERR, "mount failed"); 3690 ext4_msg(sb, KERN_ERR, "mount failed");
3661 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3691 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
3662failed_mount_wq: 3692failed_mount_wq:
@@ -3666,6 +3696,7 @@ failed_mount_wq:
3666 sbi->s_journal = NULL; 3696 sbi->s_journal = NULL;
3667 } 3697 }
3668failed_mount3: 3698failed_mount3:
3699 del_timer(&sbi->s_err_report);
3669 if (sbi->s_flex_groups) { 3700 if (sbi->s_flex_groups) {
3670 if (is_vmalloc_addr(sbi->s_flex_groups)) 3701 if (is_vmalloc_addr(sbi->s_flex_groups))
3671 vfree(sbi->s_flex_groups); 3702 vfree(sbi->s_flex_groups);
@@ -4132,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4132/* 4163/*
4133 * LVM calls this function before a (read-only) snapshot is created. This 4164 * LVM calls this function before a (read-only) snapshot is created. This
4134 * gives us a chance to flush the journal completely and mark the fs clean. 4165 * gives us a chance to flush the journal completely and mark the fs clean.
4166 *
4167 * Note that only this function cannot bring a filesystem to be in a clean
4168 * state independently, because ext4 prevents a new handle from being started
4169 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
4170 * the upper layer.
4135 */ 4171 */
4136static int ext4_freeze(struct super_block *sb) 4172static int ext4_freeze(struct super_block *sb)
4137{ 4173{
@@ -4608,17 +4644,30 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4608 4644
4609static int ext4_quota_off(struct super_block *sb, int type) 4645static int ext4_quota_off(struct super_block *sb, int type)
4610{ 4646{
4647 struct inode *inode = sb_dqopt(sb)->files[type];
4648 handle_t *handle;
4649
4611 /* Force all delayed allocation blocks to be allocated. 4650 /* Force all delayed allocation blocks to be allocated.
4612 * Caller already holds s_umount sem */ 4651 * Caller already holds s_umount sem */
4613 if (test_opt(sb, DELALLOC)) 4652 if (test_opt(sb, DELALLOC))
4614 sync_filesystem(sb); 4653 sync_filesystem(sb);
4615 4654
4655 /* Update modification times of quota files when userspace can
4656 * start looking at them */
4657 handle = ext4_journal_start(inode, 1);
4658 if (IS_ERR(handle))
4659 goto out;
4660 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4661 ext4_mark_inode_dirty(handle, inode);
4662 ext4_journal_stop(handle);
4663
4664out:
4616 return dquot_quota_off(sb, type); 4665 return dquot_quota_off(sb, type);
4617} 4666}
4618 4667
4619/* Read data from quotafile - avoid pagecache and such because we cannot afford 4668/* Read data from quotafile - avoid pagecache and such because we cannot afford
4620 * acquiring the locks... As quota files are never truncated and quota code 4669 * acquiring the locks... As quota files are never truncated and quota code
4621 * itself serializes the operations (and noone else should touch the files) 4670 * itself serializes the operations (and no one else should touch the files)
4622 * we don't have to be afraid of races */ 4671 * we don't have to be afraid of races */
4623static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 4672static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
4624 size_t len, loff_t off) 4673 size_t len, loff_t off)
@@ -4708,9 +4757,8 @@ out:
4708 if (inode->i_size < off + len) { 4757 if (inode->i_size < off + len) {
4709 i_size_write(inode, off + len); 4758 i_size_write(inode, off + len);
4710 EXT4_I(inode)->i_disksize = inode->i_size; 4759 EXT4_I(inode)->i_disksize = inode->i_size;
4760 ext4_mark_inode_dirty(handle, inode);
4711 } 4761 }
4712 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4713 ext4_mark_inode_dirty(handle, inode);
4714 mutex_unlock(&inode->i_mutex); 4762 mutex_unlock(&inode->i_mutex);
4715 return len; 4763 return len;
4716} 4764}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fc32176eee39..b545ca1c459c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -735,7 +735,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
735 int offset = (char *)s->here - bs->bh->b_data; 735 int offset = (char *)s->here - bs->bh->b_data;
736 736
737 unlock_buffer(bs->bh); 737 unlock_buffer(bs->bh);
738 jbd2_journal_release_buffer(handle, bs->bh); 738 ext4_handle_release_buffer(handle, bs->bh);
739 if (ce) { 739 if (ce) {
740 mb_cache_entry_release(ce); 740 mb_cache_entry_release(ce);
741 ce = NULL; 741 ce = NULL;
@@ -833,7 +833,7 @@ inserted:
833 new_bh = sb_getblk(sb, block); 833 new_bh = sb_getblk(sb, block);
834 if (!new_bh) { 834 if (!new_bh) {
835getblk_failed: 835getblk_failed:
836 ext4_free_blocks(handle, inode, 0, block, 1, 836 ext4_free_blocks(handle, inode, NULL, block, 1,
837 EXT4_FREE_BLOCKS_METADATA); 837 EXT4_FREE_BLOCKS_METADATA);
838 error = -EIO; 838 error = -EIO;
839 goto cleanup; 839 goto cleanup;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0e277ec4b612..8d68690bdcf1 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -236,7 +236,6 @@ static const struct address_space_operations fat_aops = {
236 .readpages = fat_readpages, 236 .readpages = fat_readpages,
237 .writepage = fat_writepage, 237 .writepage = fat_writepage,
238 .writepages = fat_writepages, 238 .writepages = fat_writepages,
239 .sync_page = block_sync_page,
240 .write_begin = fat_write_begin, 239 .write_begin = fat_write_begin,
241 .write_end = fat_write_end, 240 .write_end = fat_write_end,
242 .direct_IO = fat_direct_IO, 241 .direct_IO = fat_direct_IO,
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6c82e5bac039..22764c7c8382 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -159,7 +159,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
159 159
160 /* O_NOATIME can only be set by the owner or superuser */ 160 /* O_NOATIME can only be set by the owner or superuser */
161 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 161 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
162 if (!is_owner_or_cap(inode)) 162 if (!inode_owner_or_capable(inode))
163 return -EPERM; 163 return -EPERM;
164 164
165 /* required for strict SunOS emulation */ 165 /* required for strict SunOS emulation */
diff --git a/fs/fhandle.c b/fs/fhandle.c
index bf93ad2bee07..6b088641f5bf 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -7,6 +7,7 @@
7#include <linux/exportfs.h> 7#include <linux/exportfs.h>
8#include <linux/fs_struct.h> 8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h> 9#include <linux/fsnotify.h>
10#include <linux/personality.h>
10#include <asm/uaccess.h> 11#include <asm/uaccess.h>
11#include "internal.h" 12#include "internal.h"
12 13
diff --git a/fs/fifo.c b/fs/fifo.c
index 4e303c22d5ee..b1a524d798e7 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -66,8 +66,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
66 /* suppress POLLHUP until we have 66 /* suppress POLLHUP until we have
67 * seen a writer */ 67 * seen a writer */
68 filp->f_version = pipe->w_counter; 68 filp->f_version = pipe->w_counter;
69 } else 69 } else {
70 {
71 wait_for_partner(inode, &pipe->w_counter); 70 wait_for_partner(inode, &pipe->w_counter);
72 if(signal_pending(current)) 71 if(signal_pending(current))
73 goto err_rd; 72 goto err_rd;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 751d6b255a12..0845f84f2a5f 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -110,14 +110,13 @@ int unregister_filesystem(struct file_system_type * fs)
110 *tmp = fs->next; 110 *tmp = fs->next;
111 fs->next = NULL; 111 fs->next = NULL;
112 write_unlock(&file_systems_lock); 112 write_unlock(&file_systems_lock);
113 synchronize_rcu();
113 return 0; 114 return 0;
114 } 115 }
115 tmp = &(*tmp)->next; 116 tmp = &(*tmp)->next;
116 } 117 }
117 write_unlock(&file_systems_lock); 118 write_unlock(&file_systems_lock);
118 119
119 synchronize_rcu();
120
121 return -EINVAL; 120 return -EINVAL;
122} 121}
123 122
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index 78948b4b1894..c9a6a94e58e9 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -164,7 +164,7 @@ vxfs_read_fshead(struct super_block *sbp)
164 goto out_free_pfp; 164 goto out_free_pfp;
165 } 165 }
166 if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { 166 if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
167 printk(KERN_ERR "vxfs: structual list inode is of wrong type (%x)\n", 167 printk(KERN_ERR "vxfs: structural list inode is of wrong type (%x)\n",
168 VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); 168 VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK);
169 goto out_iput_stilist; 169 goto out_iput_stilist;
170 } 170 }
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 6c5131d592f0..3360f1e678ad 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -162,7 +162,7 @@ vxfs_find_entry(struct inode *ip, struct dentry *dp, struct page **ppp)
162/** 162/**
163 * vxfs_inode_by_name - find inode number for dentry 163 * vxfs_inode_by_name - find inode number for dentry
164 * @dip: directory to search in 164 * @dip: directory to search in
165 * @dp: dentry we seach for 165 * @dp: dentry we search for
166 * 166 *
167 * Description: 167 * Description:
168 * vxfs_inode_by_name finds out the inode number of 168 * vxfs_inode_by_name finds out the inode number of
diff --git a/fs/freevxfs/vxfs_olt.h b/fs/freevxfs/vxfs_olt.h
index d8324296486f..b7b3af502615 100644
--- a/fs/freevxfs/vxfs_olt.h
+++ b/fs/freevxfs/vxfs_olt.h
@@ -60,7 +60,7 @@ enum {
60 * 60 *
61 * The Object Location Table header is placed at the beginning of each 61 * The Object Location Table header is placed at the beginning of each
62 * OLT extent. It is used to fing certain filesystem-wide metadata, e.g. 62 * OLT extent. It is used to fing certain filesystem-wide metadata, e.g.
63 * the inital inode list, the fileset header or the device configuration. 63 * the initial inode list, the fileset header or the device configuration.
64 */ 64 */
65struct vxfs_olt { 65struct vxfs_olt {
66 u_int32_t olt_magic; /* magic number */ 66 u_int32_t olt_magic; /* magic number */
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 1429f3ae1e86..5d318c44f855 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -44,7 +44,6 @@ static sector_t vxfs_bmap(struct address_space *, sector_t);
44const struct address_space_operations vxfs_aops = { 44const struct address_space_operations vxfs_aops = {
45 .readpage = vxfs_readpage, 45 .readpage = vxfs_readpage,
46 .bmap = vxfs_bmap, 46 .bmap = vxfs_bmap,
47 .sync_page = block_sync_page,
48}; 47};
49 48
50inline void 49inline void
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59c6e4956786..34591ee804b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -144,7 +144,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
144 * 144 *
145 * Description: 145 * Description:
146 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 146 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
147 * started when this function returns, we make no guarentees on 147 * started when this function returns, we make no guarantees on
148 * completion. Caller need not hold sb s_umount semaphore. 148 * completion. Caller need not hold sb s_umount semaphore.
149 * 149 *
150 */ 150 */
@@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
176} 176}
177 177
178/* 178/*
179 * Remove the inode from the writeback list it is on.
180 */
181void inode_wb_list_del(struct inode *inode)
182{
183 spin_lock(&inode_wb_list_lock);
184 list_del_init(&inode->i_wb_list);
185 spin_unlock(&inode_wb_list_lock);
186}
187
188
189/*
179 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 190 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
180 * furthest end of its superblock's dirty-inode list. 191 * furthest end of its superblock's dirty-inode list.
181 * 192 *
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode)
188{ 199{
189 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 200 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
190 201
202 assert_spin_locked(&inode_wb_list_lock);
191 if (!list_empty(&wb->b_dirty)) { 203 if (!list_empty(&wb->b_dirty)) {
192 struct inode *tail; 204 struct inode *tail;
193 205
@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode)
205{ 217{
206 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 218 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
207 219
220 assert_spin_locked(&inode_wb_list_lock);
208 list_move(&inode->i_wb_list, &wb->b_more_io); 221 list_move(&inode->i_wb_list, &wb->b_more_io);
209} 222}
210 223
211static void inode_sync_complete(struct inode *inode) 224static void inode_sync_complete(struct inode *inode)
212{ 225{
213 /* 226 /*
214 * Prevent speculative execution through spin_unlock(&inode_lock); 227 * Prevent speculative execution through
228 * spin_unlock(&inode_wb_list_lock);
215 */ 229 */
230
216 smp_mb(); 231 smp_mb();
217 wake_up_bit(&inode->i_state, __I_SYNC); 232 wake_up_bit(&inode->i_state, __I_SYNC);
218} 233}
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
286 */ 301 */
287static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) 302static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
288{ 303{
304 assert_spin_locked(&inode_wb_list_lock);
289 list_splice_init(&wb->b_more_io, &wb->b_io); 305 list_splice_init(&wb->b_more_io, &wb->b_io);
290 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 306 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
291} 307}
@@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode)
306 wait_queue_head_t *wqh; 322 wait_queue_head_t *wqh;
307 323
308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 324 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
309 while (inode->i_state & I_SYNC) { 325 while (inode->i_state & I_SYNC) {
310 spin_unlock(&inode_lock); 326 spin_unlock(&inode->i_lock);
327 spin_unlock(&inode_wb_list_lock);
311 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 328 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
312 spin_lock(&inode_lock); 329 spin_lock(&inode_wb_list_lock);
330 spin_lock(&inode->i_lock);
313 } 331 }
314} 332}
315 333
316/* 334/*
317 * Write out an inode's dirty pages. Called under inode_lock. Either the 335 * Write out an inode's dirty pages. Called under inode_wb_list_lock and
318 * caller has ref on the inode (either via __iget or via syscall against an fd) 336 * inode->i_lock. Either the caller has an active reference on the inode or
319 * or the inode has I_WILL_FREE set (via generic_forget_inode) 337 * the inode has I_WILL_FREE set.
320 * 338 *
321 * If `wait' is set, wait on the writeout. 339 * If `wait' is set, wait on the writeout.
322 * 340 *
323 * The whole writeout design is quite complex and fragile. We want to avoid 341 * The whole writeout design is quite complex and fragile. We want to avoid
324 * starvation of particular inodes when others are being redirtied, prevent 342 * starvation of particular inodes when others are being redirtied, prevent
325 * livelocks, etc. 343 * livelocks, etc.
326 *
327 * Called under inode_lock.
328 */ 344 */
329static int 345static int
330writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 346writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
333 unsigned dirty; 349 unsigned dirty;
334 int ret; 350 int ret;
335 351
352 assert_spin_locked(&inode_wb_list_lock);
353 assert_spin_locked(&inode->i_lock);
354
336 if (!atomic_read(&inode->i_count)) 355 if (!atomic_read(&inode->i_count))
337 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 356 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
338 else 357 else
@@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
363 /* Set I_SYNC, reset I_DIRTY_PAGES */ 382 /* Set I_SYNC, reset I_DIRTY_PAGES */
364 inode->i_state |= I_SYNC; 383 inode->i_state |= I_SYNC;
365 inode->i_state &= ~I_DIRTY_PAGES; 384 inode->i_state &= ~I_DIRTY_PAGES;
366 spin_unlock(&inode_lock); 385 spin_unlock(&inode->i_lock);
386 spin_unlock(&inode_wb_list_lock);
367 387
368 ret = do_writepages(mapping, wbc); 388 ret = do_writepages(mapping, wbc);
369 389
@@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
383 * due to delalloc, clear dirty metadata flags right before 403 * due to delalloc, clear dirty metadata flags right before
384 * write_inode() 404 * write_inode()
385 */ 405 */
386 spin_lock(&inode_lock); 406 spin_lock(&inode->i_lock);
387 dirty = inode->i_state & I_DIRTY; 407 dirty = inode->i_state & I_DIRTY;
388 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 408 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
389 spin_unlock(&inode_lock); 409 spin_unlock(&inode->i_lock);
390 /* Don't write the inode if only I_DIRTY_PAGES was set */ 410 /* Don't write the inode if only I_DIRTY_PAGES was set */
391 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 411 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
392 int err = write_inode(inode, wbc); 412 int err = write_inode(inode, wbc);
@@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
394 ret = err; 414 ret = err;
395 } 415 }
396 416
397 spin_lock(&inode_lock); 417 spin_lock(&inode_wb_list_lock);
418 spin_lock(&inode->i_lock);
398 inode->i_state &= ~I_SYNC; 419 inode->i_state &= ~I_SYNC;
399 if (!(inode->i_state & I_FREEING)) { 420 if (!(inode->i_state & I_FREEING)) {
400 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 421 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
506 * kind does not need peridic writeout yet, and for the latter 527 * kind does not need peridic writeout yet, and for the latter
507 * kind writeout is handled by the freer. 528 * kind writeout is handled by the freer.
508 */ 529 */
530 spin_lock(&inode->i_lock);
509 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 531 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
532 spin_unlock(&inode->i_lock);
510 requeue_io(inode); 533 requeue_io(inode);
511 continue; 534 continue;
512 } 535 }
@@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
515 * Was this inode dirtied after sync_sb_inodes was called? 538 * Was this inode dirtied after sync_sb_inodes was called?
516 * This keeps sync from extra jobs and livelock. 539 * This keeps sync from extra jobs and livelock.
517 */ 540 */
518 if (inode_dirtied_after(inode, wbc->wb_start)) 541 if (inode_dirtied_after(inode, wbc->wb_start)) {
542 spin_unlock(&inode->i_lock);
519 return 1; 543 return 1;
544 }
520 545
521 __iget(inode); 546 __iget(inode);
547
522 pages_skipped = wbc->pages_skipped; 548 pages_skipped = wbc->pages_skipped;
523 writeback_single_inode(inode, wbc); 549 writeback_single_inode(inode, wbc);
524 if (wbc->pages_skipped != pages_skipped) { 550 if (wbc->pages_skipped != pages_skipped) {
@@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
528 */ 554 */
529 redirty_tail(inode); 555 redirty_tail(inode);
530 } 556 }
531 spin_unlock(&inode_lock); 557 spin_unlock(&inode->i_lock);
558 spin_unlock(&inode_wb_list_lock);
532 iput(inode); 559 iput(inode);
533 cond_resched(); 560 cond_resched();
534 spin_lock(&inode_lock); 561 spin_lock(&inode_wb_list_lock);
535 if (wbc->nr_to_write <= 0) { 562 if (wbc->nr_to_write <= 0) {
536 wbc->more_io = 1; 563 wbc->more_io = 1;
537 return 1; 564 return 1;
@@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
550 577
551 if (!wbc->wb_start) 578 if (!wbc->wb_start)
552 wbc->wb_start = jiffies; /* livelock avoidance */ 579 wbc->wb_start = jiffies; /* livelock avoidance */
553 spin_lock(&inode_lock); 580 spin_lock(&inode_wb_list_lock);
554 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 581 if (!wbc->for_kupdate || list_empty(&wb->b_io))
555 queue_io(wb, wbc->older_than_this); 582 queue_io(wb, wbc->older_than_this);
556 583
@@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
568 if (ret) 595 if (ret)
569 break; 596 break;
570 } 597 }
571 spin_unlock(&inode_lock); 598 spin_unlock(&inode_wb_list_lock);
572 /* Leave any unwritten inodes on b_io */ 599 /* Leave any unwritten inodes on b_io */
573} 600}
574 601
@@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
577{ 604{
578 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 605 WARN_ON(!rwsem_is_locked(&sb->s_umount));
579 606
580 spin_lock(&inode_lock); 607 spin_lock(&inode_wb_list_lock);
581 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 608 if (!wbc->for_kupdate || list_empty(&wb->b_io))
582 queue_io(wb, wbc->older_than_this); 609 queue_io(wb, wbc->older_than_this);
583 writeback_sb_inodes(sb, wb, wbc, true); 610 writeback_sb_inodes(sb, wb, wbc, true);
584 spin_unlock(&inode_lock); 611 spin_unlock(&inode_wb_list_lock);
585} 612}
586 613
587/* 614/*
@@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb,
720 * become available for writeback. Otherwise 747 * become available for writeback. Otherwise
721 * we'll just busyloop. 748 * we'll just busyloop.
722 */ 749 */
723 spin_lock(&inode_lock); 750 spin_lock(&inode_wb_list_lock);
724 if (!list_empty(&wb->b_more_io)) { 751 if (!list_empty(&wb->b_more_io)) {
725 inode = wb_inode(wb->b_more_io.prev); 752 inode = wb_inode(wb->b_more_io.prev);
726 trace_wbc_writeback_wait(&wbc, wb->bdi); 753 trace_wbc_writeback_wait(&wbc, wb->bdi);
754 spin_lock(&inode->i_lock);
727 inode_wait_for_writeback(inode); 755 inode_wait_for_writeback(inode);
756 spin_unlock(&inode->i_lock);
728 } 757 }
729 spin_unlock(&inode_lock); 758 spin_unlock(&inode_wb_list_lock);
730 } 759 }
731 760
732 return wrote; 761 return wrote;
@@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
992{ 1021{
993 struct super_block *sb = inode->i_sb; 1022 struct super_block *sb = inode->i_sb;
994 struct backing_dev_info *bdi = NULL; 1023 struct backing_dev_info *bdi = NULL;
995 bool wakeup_bdi = false;
996 1024
997 /* 1025 /*
998 * Don't do this for I_DIRTY_PAGES - that doesn't actually 1026 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1016 if (unlikely(block_dump)) 1044 if (unlikely(block_dump))
1017 block_dump___mark_inode_dirty(inode); 1045 block_dump___mark_inode_dirty(inode);
1018 1046
1019 spin_lock(&inode_lock); 1047 spin_lock(&inode->i_lock);
1020 if ((inode->i_state & flags) != flags) { 1048 if ((inode->i_state & flags) != flags) {
1021 const int was_dirty = inode->i_state & I_DIRTY; 1049 const int was_dirty = inode->i_state & I_DIRTY;
1022 1050
@@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1028 * superblock list, based upon its state. 1056 * superblock list, based upon its state.
1029 */ 1057 */
1030 if (inode->i_state & I_SYNC) 1058 if (inode->i_state & I_SYNC)
1031 goto out; 1059 goto out_unlock_inode;
1032 1060
1033 /* 1061 /*
1034 * Only add valid (hashed) inodes to the superblock's 1062 * Only add valid (hashed) inodes to the superblock's
@@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1036 */ 1064 */
1037 if (!S_ISBLK(inode->i_mode)) { 1065 if (!S_ISBLK(inode->i_mode)) {
1038 if (inode_unhashed(inode)) 1066 if (inode_unhashed(inode))
1039 goto out; 1067 goto out_unlock_inode;
1040 } 1068 }
1041 if (inode->i_state & I_FREEING) 1069 if (inode->i_state & I_FREEING)
1042 goto out; 1070 goto out_unlock_inode;
1043 1071
1044 /* 1072 /*
1045 * If the inode was already on b_dirty/b_io/b_more_io, don't 1073 * If the inode was already on b_dirty/b_io/b_more_io, don't
1046 * reposition it (that would break b_dirty time-ordering). 1074 * reposition it (that would break b_dirty time-ordering).
1047 */ 1075 */
1048 if (!was_dirty) { 1076 if (!was_dirty) {
1077 bool wakeup_bdi = false;
1049 bdi = inode_to_bdi(inode); 1078 bdi = inode_to_bdi(inode);
1050 1079
1051 if (bdi_cap_writeback_dirty(bdi)) { 1080 if (bdi_cap_writeback_dirty(bdi)) {
@@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1062 wakeup_bdi = true; 1091 wakeup_bdi = true;
1063 } 1092 }
1064 1093
1094 spin_unlock(&inode->i_lock);
1095 spin_lock(&inode_wb_list_lock);
1065 inode->dirtied_when = jiffies; 1096 inode->dirtied_when = jiffies;
1066 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1097 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1098 spin_unlock(&inode_wb_list_lock);
1099
1100 if (wakeup_bdi)
1101 bdi_wakeup_thread_delayed(bdi);
1102 return;
1067 } 1103 }
1068 } 1104 }
1069out: 1105out_unlock_inode:
1070 spin_unlock(&inode_lock); 1106 spin_unlock(&inode->i_lock);
1071 1107
1072 if (wakeup_bdi)
1073 bdi_wakeup_thread_delayed(bdi);
1074} 1108}
1075EXPORT_SYMBOL(__mark_inode_dirty); 1109EXPORT_SYMBOL(__mark_inode_dirty);
1076 1110
@@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb)
1101 */ 1135 */
1102 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1136 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1103 1137
1104 spin_lock(&inode_lock); 1138 spin_lock(&inode_sb_list_lock);
1105 1139
1106 /* 1140 /*
1107 * Data integrity sync. Must wait for all pages under writeback, 1141 * Data integrity sync. Must wait for all pages under writeback,
@@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb)
1111 * we still have to wait for that writeout. 1145 * we still have to wait for that writeout.
1112 */ 1146 */
1113 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1147 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1114 struct address_space *mapping; 1148 struct address_space *mapping = inode->i_mapping;
1115 1149
1116 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 1150 spin_lock(&inode->i_lock);
1117 continue; 1151 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
1118 mapping = inode->i_mapping; 1152 (mapping->nrpages == 0)) {
1119 if (mapping->nrpages == 0) 1153 spin_unlock(&inode->i_lock);
1120 continue; 1154 continue;
1155 }
1121 __iget(inode); 1156 __iget(inode);
1122 spin_unlock(&inode_lock); 1157 spin_unlock(&inode->i_lock);
1158 spin_unlock(&inode_sb_list_lock);
1159
1123 /* 1160 /*
1124 * We hold a reference to 'inode' so it couldn't have 1161 * We hold a reference to 'inode' so it couldn't have been
1125 * been removed from s_inodes list while we dropped the 1162 * removed from s_inodes list while we dropped the
1126 * inode_lock. We cannot iput the inode now as we can 1163 * inode_sb_list_lock. We cannot iput the inode now as we can
1127 * be holding the last reference and we cannot iput it 1164 * be holding the last reference and we cannot iput it under
1128 * under inode_lock. So we keep the reference and iput 1165 * inode_sb_list_lock. So we keep the reference and iput it
1129 * it later. 1166 * later.
1130 */ 1167 */
1131 iput(old_inode); 1168 iput(old_inode);
1132 old_inode = inode; 1169 old_inode = inode;
@@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb)
1135 1172
1136 cond_resched(); 1173 cond_resched();
1137 1174
1138 spin_lock(&inode_lock); 1175 spin_lock(&inode_sb_list_lock);
1139 } 1176 }
1140 spin_unlock(&inode_lock); 1177 spin_unlock(&inode_sb_list_lock);
1141 iput(old_inode); 1178 iput(old_inode);
1142} 1179}
1143 1180
@@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync)
1271 wbc.nr_to_write = 0; 1308 wbc.nr_to_write = 0;
1272 1309
1273 might_sleep(); 1310 might_sleep();
1274 spin_lock(&inode_lock); 1311 spin_lock(&inode_wb_list_lock);
1312 spin_lock(&inode->i_lock);
1275 ret = writeback_single_inode(inode, &wbc); 1313 ret = writeback_single_inode(inode, &wbc);
1276 spin_unlock(&inode_lock); 1314 spin_unlock(&inode->i_lock);
1315 spin_unlock(&inode_wb_list_lock);
1277 if (sync) 1316 if (sync)
1278 inode_sync_wait(inode); 1317 inode_sync_wait(inode);
1279 return ret; 1318 return ret;
@@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1295{ 1334{
1296 int ret; 1335 int ret;
1297 1336
1298 spin_lock(&inode_lock); 1337 spin_lock(&inode_wb_list_lock);
1338 spin_lock(&inode->i_lock);
1299 ret = writeback_single_inode(inode, wbc); 1339 ret = writeback_single_inode(inode, wbc);
1300 spin_unlock(&inode_lock); 1340 spin_unlock(&inode->i_lock);
1341 spin_unlock(&inode_wb_list_lock);
1301 return ret; 1342 return ret;
1302} 1343}
1303EXPORT_SYMBOL(sync_inode); 1344EXPORT_SYMBOL(sync_inode);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 7c39b885f969..b6cca47f7b07 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -305,7 +305,7 @@ static void cuse_gendev_release(struct device *dev)
305static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 305static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
306{ 306{
307 struct cuse_conn *cc = fc_to_cc(fc); 307 struct cuse_conn *cc = fc_to_cc(fc);
308 struct cuse_init_out *arg = &req->misc.cuse_init_out; 308 struct cuse_init_out *arg = req->out.args[0].value;
309 struct page *page = req->pages[0]; 309 struct page *page = req->pages[0];
310 struct cuse_devinfo devinfo = { }; 310 struct cuse_devinfo devinfo = { };
311 struct device *dev; 311 struct device *dev;
@@ -384,6 +384,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
384 dev_set_uevent_suppress(dev, 0); 384 dev_set_uevent_suppress(dev, 0);
385 kobject_uevent(&dev->kobj, KOBJ_ADD); 385 kobject_uevent(&dev->kobj, KOBJ_ADD);
386out: 386out:
387 kfree(arg);
387 __free_page(page); 388 __free_page(page);
388 return; 389 return;
389 390
@@ -405,6 +406,7 @@ static int cuse_send_init(struct cuse_conn *cc)
405 struct page *page; 406 struct page *page;
406 struct fuse_conn *fc = &cc->fc; 407 struct fuse_conn *fc = &cc->fc;
407 struct cuse_init_in *arg; 408 struct cuse_init_in *arg;
409 void *outarg;
408 410
409 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 411 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
410 412
@@ -419,6 +421,10 @@ static int cuse_send_init(struct cuse_conn *cc)
419 if (!page) 421 if (!page)
420 goto err_put_req; 422 goto err_put_req;
421 423
424 outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
425 if (!outarg)
426 goto err_free_page;
427
422 arg = &req->misc.cuse_init_in; 428 arg = &req->misc.cuse_init_in;
423 arg->major = FUSE_KERNEL_VERSION; 429 arg->major = FUSE_KERNEL_VERSION;
424 arg->minor = FUSE_KERNEL_MINOR_VERSION; 430 arg->minor = FUSE_KERNEL_MINOR_VERSION;
@@ -429,7 +435,7 @@ static int cuse_send_init(struct cuse_conn *cc)
429 req->in.args[0].value = arg; 435 req->in.args[0].value = arg;
430 req->out.numargs = 2; 436 req->out.numargs = 2;
431 req->out.args[0].size = sizeof(struct cuse_init_out); 437 req->out.args[0].size = sizeof(struct cuse_init_out);
432 req->out.args[0].value = &req->misc.cuse_init_out; 438 req->out.args[0].value = outarg;
433 req->out.args[1].size = CUSE_INIT_INFO_MAX; 439 req->out.args[1].size = CUSE_INIT_INFO_MAX;
434 req->out.argvar = 1; 440 req->out.argvar = 1;
435 req->out.argpages = 1; 441 req->out.argpages = 1;
@@ -440,6 +446,8 @@ static int cuse_send_init(struct cuse_conn *cc)
440 446
441 return 0; 447 return 0;
442 448
449err_free_page:
450 __free_page(page);
443err_put_req: 451err_put_req:
444 fuse_put_request(fc, req); 452 fuse_put_request(fc, req);
445err: 453err:
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cf8d28d1fbad..640fc229df10 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -737,14 +737,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
737 if (WARN_ON(PageMlocked(oldpage))) 737 if (WARN_ON(PageMlocked(oldpage)))
738 goto out_fallback_unlock; 738 goto out_fallback_unlock;
739 739
740 remove_from_page_cache(oldpage); 740 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
741 page_cache_release(oldpage);
742
743 err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
744 if (err) { 741 if (err) {
745 printk(KERN_WARNING "fuse_try_move_page: failed to add page"); 742 unlock_page(newpage);
746 goto out_fallback_unlock; 743 return err;
747 } 744 }
745
748 page_cache_get(newpage); 746 page_cache_get(newpage);
749 747
750 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 748 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
@@ -1910,6 +1908,21 @@ __acquires(fc->lock)
1910 kfree(dequeue_forget(fc, 1, NULL)); 1908 kfree(dequeue_forget(fc, 1, NULL));
1911} 1909}
1912 1910
1911static void end_polls(struct fuse_conn *fc)
1912{
1913 struct rb_node *p;
1914
1915 p = rb_first(&fc->polled_files);
1916
1917 while (p) {
1918 struct fuse_file *ff;
1919 ff = rb_entry(p, struct fuse_file, polled_node);
1920 wake_up_interruptible_all(&ff->poll_wait);
1921
1922 p = rb_next(p);
1923 }
1924}
1925
1913/* 1926/*
1914 * Abort all requests. 1927 * Abort all requests.
1915 * 1928 *
@@ -1937,6 +1950,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
1937 fc->blocked = 0; 1950 fc->blocked = 0;
1938 end_io_requests(fc); 1951 end_io_requests(fc);
1939 end_queued_requests(fc); 1952 end_queued_requests(fc);
1953 end_polls(fc);
1940 wake_up_all(&fc->waitq); 1954 wake_up_all(&fc->waitq);
1941 wake_up_all(&fc->blocked_waitq); 1955 wake_up_all(&fc->blocked_waitq);
1942 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 1956 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1953,6 +1967,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
1953 fc->connected = 0; 1967 fc->connected = 0;
1954 fc->blocked = 0; 1968 fc->blocked = 0;
1955 end_queued_requests(fc); 1969 end_queued_requests(fc);
1970 end_polls(fc);
1956 wake_up_all(&fc->blocked_waitq); 1971 wake_up_all(&fc->blocked_waitq);
1957 spin_unlock(&fc->lock); 1972 spin_unlock(&fc->lock);
1958 fuse_conn_put(fc); 1973 fuse_conn_put(fc);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8bd0ef9286c3..c6ba49bd95b3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,10 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd && nd->flags & LOOKUP_RCU) 161 inode = ACCESS_ONCE(entry->d_inode);
162 return -ECHILD;
163
164 inode = entry->d_inode;
165 if (inode && is_bad_inode(inode)) 162 if (inode && is_bad_inode(inode))
166 return 0; 163 return 0;
167 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 164 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -177,6 +174,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
177 if (!inode) 174 if (!inode)
178 return 0; 175 return 0;
179 176
177 if (nd->flags & LOOKUP_RCU)
178 return -ECHILD;
179
180 fc = get_fuse_conn(inode); 180 fc = get_fuse_conn(inode);
181 req = fuse_get_req(fc); 181 req = fuse_get_req(fc);
182 if (IS_ERR(req)) 182 if (IS_ERR(req))
@@ -970,6 +970,14 @@ static int fuse_access(struct inode *inode, int mask)
970 return err; 970 return err;
971} 971}
972 972
973static int fuse_perm_getattr(struct inode *inode, int flags)
974{
975 if (flags & IPERM_FLAG_RCU)
976 return -ECHILD;
977
978 return fuse_do_getattr(inode, NULL, NULL);
979}
980
973/* 981/*
974 * Check permission. The two basic access models of FUSE are: 982 * Check permission. The two basic access models of FUSE are:
975 * 983 *
@@ -989,9 +997,6 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
989 bool refreshed = false; 997 bool refreshed = false;
990 int err = 0; 998 int err = 0;
991 999
992 if (flags & IPERM_FLAG_RCU)
993 return -ECHILD;
994
995 if (!fuse_allow_task(fc, current)) 1000 if (!fuse_allow_task(fc, current))
996 return -EACCES; 1001 return -EACCES;
997 1002
@@ -1000,9 +1005,15 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1000 */ 1005 */
1001 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) || 1006 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1002 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1007 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1003 err = fuse_update_attributes(inode, NULL, NULL, &refreshed); 1008 struct fuse_inode *fi = get_fuse_inode(inode);
1004 if (err) 1009
1005 return err; 1010 if (fi->i_time < get_jiffies_64()) {
1011 refreshed = true;
1012
1013 err = fuse_perm_getattr(inode, flags);
1014 if (err)
1015 return err;
1016 }
1006 } 1017 }
1007 1018
1008 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1019 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
@@ -1012,7 +1023,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1012 attributes. This is also needed, because the root 1023 attributes. This is also needed, because the root
1013 node will at first have no permissions */ 1024 node will at first have no permissions */
1014 if (err == -EACCES && !refreshed) { 1025 if (err == -EACCES && !refreshed) {
1015 err = fuse_do_getattr(inode, NULL, NULL); 1026 err = fuse_perm_getattr(inode, flags);
1016 if (!err) 1027 if (!err)
1017 err = generic_permission(inode, mask, 1028 err = generic_permission(inode, mask,
1018 flags, NULL); 1029 flags, NULL);
@@ -1023,13 +1034,16 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1023 noticed immediately, only after the attribute 1034 noticed immediately, only after the attribute
1024 timeout has expired */ 1035 timeout has expired */
1025 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1036 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1037 if (flags & IPERM_FLAG_RCU)
1038 return -ECHILD;
1039
1026 err = fuse_access(inode, mask); 1040 err = fuse_access(inode, mask);
1027 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1041 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1028 if (!(inode->i_mode & S_IXUGO)) { 1042 if (!(inode->i_mode & S_IXUGO)) {
1029 if (refreshed) 1043 if (refreshed)
1030 return -EACCES; 1044 return -EACCES;
1031 1045
1032 err = fuse_do_getattr(inode, NULL, NULL); 1046 err = fuse_perm_getattr(inode, flags);
1033 if (!err && !(inode->i_mode & S_IXUGO)) 1047 if (!err && !(inode->i_mode & S_IXUGO))
1034 return -EACCES; 1048 return -EACCES;
1035 } 1049 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9e0832dbb1e3..82a66466a24c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -222,7 +222,7 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
222 rb_erase(&ff->polled_node, &fc->polled_files); 222 rb_erase(&ff->polled_node, &fc->polled_files);
223 spin_unlock(&fc->lock); 223 spin_unlock(&fc->lock);
224 224
225 wake_up_interruptible_sync(&ff->poll_wait); 225 wake_up_interruptible_all(&ff->poll_wait);
226 226
227 inarg->fh = ff->fh; 227 inarg->fh = ff->fh;
228 inarg->flags = flags; 228 inarg->flags = flags;
@@ -523,7 +523,7 @@ static int fuse_readpage(struct file *file, struct page *page)
523 goto out; 523 goto out;
524 524
525 /* 525 /*
526 * Page writeback can extend beyond the liftime of the 526 * Page writeback can extend beyond the lifetime of the
527 * page-cache page, so make sure we read a properly synced 527 * page-cache page, so make sure we read a properly synced
528 * page. 528 * page.
529 */ 529 */
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d4286947bc2c..b788becada76 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,7 +272,6 @@ struct fuse_req {
272 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
273 struct fuse_init_out init_out; 273 struct fuse_init_out init_out;
274 struct cuse_init_in cuse_init_in; 274 struct cuse_init_in cuse_init_in;
275 struct cuse_init_out cuse_init_out;
276 struct { 275 struct {
277 struct fuse_read_in in; 276 struct fuse_read_in in;
278 u64 attr_ver; 277 u64 attr_ver;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 051b1a084528..cc6ec4b2f0ff 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
870 870
871 fc->bdi.name = "fuse"; 871 fc->bdi.name = "fuse";
872 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 872 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
873 fc->bdi.unplug_io_fn = default_unplug_io_fn;
874 /* fuse does it's own writeback accounting */ 873 /* fuse does it's own writeback accounting */
875 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; 874 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
876 875
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 06c48a891832..8f26d1a58912 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -74,7 +74,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
74 return -EINVAL; 74 return -EINVAL;
75 if (S_ISLNK(inode->i_mode)) 75 if (S_ISLNK(inode->i_mode))
76 return -EOPNOTSUPP; 76 return -EOPNOTSUPP;
77 if (!is_owner_or_cap(inode)) 77 if (!inode_owner_or_capable(inode))
78 return -EPERM; 78 return -EPERM;
79 if (value) { 79 if (value) {
80 acl = posix_acl_from_xattr(value, size); 80 acl = posix_acl_from_xattr(value, size);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 21f7e46da4c0..f3d23ef4e876 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index aad77e4f61b5..0f5c4f9d5d62 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -884,8 +884,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
884 } 884 }
885 885
886 brelse(dibh); 886 brelse(dibh);
887 gfs2_trans_end(sdp);
888failed: 887failed:
888 gfs2_trans_end(sdp);
889 if (al) { 889 if (al) {
890 gfs2_inplace_release(ip); 890 gfs2_inplace_release(ip);
891 gfs2_quota_unlock(ip); 891 gfs2_quota_unlock(ip);
@@ -1117,7 +1117,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
1117 .writepages = gfs2_writeback_writepages, 1117 .writepages = gfs2_writeback_writepages,
1118 .readpage = gfs2_readpage, 1118 .readpage = gfs2_readpage,
1119 .readpages = gfs2_readpages, 1119 .readpages = gfs2_readpages,
1120 .sync_page = block_sync_page,
1121 .write_begin = gfs2_write_begin, 1120 .write_begin = gfs2_write_begin,
1122 .write_end = gfs2_write_end, 1121 .write_end = gfs2_write_end,
1123 .bmap = gfs2_bmap, 1122 .bmap = gfs2_bmap,
@@ -1133,7 +1132,6 @@ static const struct address_space_operations gfs2_ordered_aops = {
1133 .writepage = gfs2_ordered_writepage, 1132 .writepage = gfs2_ordered_writepage,
1134 .readpage = gfs2_readpage, 1133 .readpage = gfs2_readpage,
1135 .readpages = gfs2_readpages, 1134 .readpages = gfs2_readpages,
1136 .sync_page = block_sync_page,
1137 .write_begin = gfs2_write_begin, 1135 .write_begin = gfs2_write_begin,
1138 .write_end = gfs2_write_end, 1136 .write_end = gfs2_write_end,
1139 .set_page_dirty = gfs2_set_page_dirty, 1137 .set_page_dirty = gfs2_set_page_dirty,
@@ -1151,7 +1149,6 @@ static const struct address_space_operations gfs2_jdata_aops = {
1151 .writepages = gfs2_jdata_writepages, 1149 .writepages = gfs2_jdata_writepages,
1152 .readpage = gfs2_readpage, 1150 .readpage = gfs2_readpage,
1153 .readpages = gfs2_readpages, 1151 .readpages = gfs2_readpages,
1154 .sync_page = block_sync_page,
1155 .write_begin = gfs2_write_begin, 1152 .write_begin = gfs2_write_begin,
1156 .write_end = gfs2_write_end, 1153 .write_end = gfs2_write_end,
1157 .set_page_dirty = gfs2_set_page_dirty, 1154 .set_page_dirty = gfs2_set_page_dirty,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index ef3dc4b9fae2..74add2ddcc3f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1136,7 +1136,7 @@ void gfs2_trim_blocks(struct inode *inode)
1136 * earlier versions of GFS2 have a bug in the stuffed file reading 1136 * earlier versions of GFS2 have a bug in the stuffed file reading
1137 * code which will result in a buffer overrun if the size is larger 1137 * code which will result in a buffer overrun if the size is larger
1138 * than the max stuffed file size. In order to prevent this from 1138 * than the max stuffed file size. In order to prevent this from
1139 * occuring, such files are unstuffed, but in other cases we can 1139 * occurring, such files are unstuffed, but in other cases we can
1140 * just update the inode size directly. 1140 * just update the inode size directly.
1141 * 1141 *
1142 * Returns: 0 on success, or -ve on error 1142 * Returns: 0 on success, or -ve on error
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5c356d09c321..f789c5732b7c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1506,7 +1506,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1506 inode = gfs2_inode_lookup(dir->i_sb, 1506 inode = gfs2_inode_lookup(dir->i_sb,
1507 be16_to_cpu(dent->de_type), 1507 be16_to_cpu(dent->de_type),
1508 be64_to_cpu(dent->de_inum.no_addr), 1508 be64_to_cpu(dent->de_inum.no_addr),
1509 be64_to_cpu(dent->de_inum.no_formal_ino)); 1509 be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1510 brelse(bh); 1510 brelse(bh);
1511 return inode; 1511 return inode;
1512 } 1512 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4074b952b059..e48310885c48 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -221,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
221 goto out_drop_write; 221 goto out_drop_write;
222 222
223 error = -EACCES; 223 error = -EACCES;
224 if (!is_owner_or_cap(inode)) 224 if (!inode_owner_or_capable(inode))
225 goto out; 225 goto out;
226 226
227 error = 0; 227 error = 0;
@@ -617,18 +617,51 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
617 return generic_file_aio_write(iocb, iov, nr_segs, pos); 617 return generic_file_aio_write(iocb, iov, nr_segs, pos);
618} 618}
619 619
620static void empty_write_end(struct page *page, unsigned from, 620static int empty_write_end(struct page *page, unsigned from,
621 unsigned to) 621 unsigned to, int mode)
622{ 622{
623 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 623 struct inode *inode = page->mapping->host;
624 struct gfs2_inode *ip = GFS2_I(inode);
625 struct buffer_head *bh;
626 unsigned offset, blksize = 1 << inode->i_blkbits;
627 pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
624 628
625 zero_user(page, from, to-from); 629 zero_user(page, from, to-from);
626 mark_page_accessed(page); 630 mark_page_accessed(page);
627 631
628 if (!gfs2_is_writeback(ip)) 632 if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
629 gfs2_page_add_databufs(ip, page, from, to); 633 if (!gfs2_is_writeback(ip))
634 gfs2_page_add_databufs(ip, page, from, to);
635
636 block_commit_write(page, from, to);
637 return 0;
638 }
639
640 offset = 0;
641 bh = page_buffers(page);
642 while (offset < to) {
643 if (offset >= from) {
644 set_buffer_uptodate(bh);
645 mark_buffer_dirty(bh);
646 clear_buffer_new(bh);
647 write_dirty_buffer(bh, WRITE);
648 }
649 offset += blksize;
650 bh = bh->b_this_page;
651 }
630 652
631 block_commit_write(page, from, to); 653 offset = 0;
654 bh = page_buffers(page);
655 while (offset < to) {
656 if (offset >= from) {
657 wait_on_buffer(bh);
658 if (!buffer_uptodate(bh))
659 return -EIO;
660 }
661 offset += blksize;
662 bh = bh->b_this_page;
663 }
664 return 0;
632} 665}
633 666
634static int needs_empty_write(sector_t block, struct inode *inode) 667static int needs_empty_write(sector_t block, struct inode *inode)
@@ -643,7 +676,8 @@ static int needs_empty_write(sector_t block, struct inode *inode)
643 return !buffer_mapped(&bh_map); 676 return !buffer_mapped(&bh_map);
644} 677}
645 678
646static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 679static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
680 int mode)
647{ 681{
648 struct inode *inode = page->mapping->host; 682 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize; 683 unsigned start, end, next, blksize;
@@ -668,7 +702,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
668 gfs2_block_map); 702 gfs2_block_map);
669 if (unlikely(ret)) 703 if (unlikely(ret))
670 return ret; 704 return ret;
671 empty_write_end(page, start, end); 705 ret = empty_write_end(page, start, end, mode);
706 if (unlikely(ret))
707 return ret;
672 end = 0; 708 end = 0;
673 } 709 }
674 start = next; 710 start = next;
@@ -682,7 +718,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
682 ret = __block_write_begin(page, start, end - start, gfs2_block_map); 718 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
683 if (unlikely(ret)) 719 if (unlikely(ret))
684 return ret; 720 return ret;
685 empty_write_end(page, start, end); 721 ret = empty_write_end(page, start, end, mode);
722 if (unlikely(ret))
723 return ret;
686 } 724 }
687 725
688 return 0; 726 return 0;
@@ -731,7 +769,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
731 769
732 if (curr == end) 770 if (curr == end)
733 to = end_offset; 771 to = end_offset;
734 error = write_empty_blocks(page, from, to); 772 error = write_empty_blocks(page, from, to, mode);
735 if (!error && offset + to > inode->i_size && 773 if (!error && offset + to > inode->i_size &&
736 !(mode & FALLOC_FL_KEEP_SIZE)) { 774 !(mode & FALLOC_FL_KEEP_SIZE)) {
737 i_size_write(inode, offset + to); 775 i_size_write(inode, offset + to);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e2431313491f..7a4fb630a320 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -93,14 +93,12 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
93 93
94static inline void spin_lock_bucket(unsigned int hash) 94static inline void spin_lock_bucket(unsigned int hash)
95{ 95{
96 struct hlist_bl_head *bl = &gl_hash_table[hash]; 96 hlist_bl_lock(&gl_hash_table[hash]);
97 bit_spin_lock(0, (unsigned long *)bl);
98} 97}
99 98
100static inline void spin_unlock_bucket(unsigned int hash) 99static inline void spin_unlock_bucket(unsigned int hash)
101{ 100{
102 struct hlist_bl_head *bl = &gl_hash_table[hash]; 101 hlist_bl_unlock(&gl_hash_table[hash]);
103 __bit_spin_unlock(0, (unsigned long *)bl);
104} 102}
105 103
106static void gfs2_glock_dealloc(struct rcu_head *rcu) 104static void gfs2_glock_dealloc(struct rcu_head *rcu)
@@ -1123,7 +1121,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1123 * @number: the lock number 1121 * @number: the lock number
1124 * @glops: the glock operations for the type of glock 1122 * @glops: the glock operations for the type of glock
1125 * @state: the state to acquire the glock in 1123 * @state: the state to acquire the glock in
1126 * @flags: modifier flags for the aquisition 1124 * @flags: modifier flags for the acquisition
1127 * @gh: the struct gfs2_holder 1125 * @gh: the struct gfs2_holder
1128 * 1126 *
1129 * Returns: errno 1127 * Returns: errno
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3754e3cbf02b..25eeb2bcee47 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -385,6 +385,10 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
385static void iopen_go_callback(struct gfs2_glock *gl) 385static void iopen_go_callback(struct gfs2_glock *gl)
386{ 386{
387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
388 struct gfs2_sbd *sdp = gl->gl_sbd;
389
390 if (sdp->sd_vfs->s_flags & MS_RDONLY)
391 return;
388 392
389 if (gl->gl_demote_state == LM_ST_UNLOCKED && 393 if (gl->gl_demote_state == LM_ST_UNLOCKED &&
390 gl->gl_state == LM_ST_SHARED && ip) { 394 gl->gl_state == LM_ST_SHARED && ip) {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 97d54a28776a..9134dcb89479 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -40,37 +40,61 @@ struct gfs2_inum_range_host {
40 u64 ir_length; 40 u64 ir_length;
41}; 41};
42 42
43struct gfs2_skip_data {
44 u64 no_addr;
45 int skipped;
46 int non_block;
47};
48
43static int iget_test(struct inode *inode, void *opaque) 49static int iget_test(struct inode *inode, void *opaque)
44{ 50{
45 struct gfs2_inode *ip = GFS2_I(inode); 51 struct gfs2_inode *ip = GFS2_I(inode);
46 u64 *no_addr = opaque; 52 struct gfs2_skip_data *data = opaque;
47 53
48 if (ip->i_no_addr == *no_addr) 54 if (ip->i_no_addr == data->no_addr) {
55 if (data->non_block &&
56 inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
57 data->skipped = 1;
58 return 0;
59 }
49 return 1; 60 return 1;
50 61 }
51 return 0; 62 return 0;
52} 63}
53 64
54static int iget_set(struct inode *inode, void *opaque) 65static int iget_set(struct inode *inode, void *opaque)
55{ 66{
56 struct gfs2_inode *ip = GFS2_I(inode); 67 struct gfs2_inode *ip = GFS2_I(inode);
57 u64 *no_addr = opaque; 68 struct gfs2_skip_data *data = opaque;
58 69
59 inode->i_ino = (unsigned long)*no_addr; 70 if (data->skipped)
60 ip->i_no_addr = *no_addr; 71 return -ENOENT;
72 inode->i_ino = (unsigned long)(data->no_addr);
73 ip->i_no_addr = data->no_addr;
61 return 0; 74 return 0;
62} 75}
63 76
64struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 77struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
65{ 78{
66 unsigned long hash = (unsigned long)no_addr; 79 unsigned long hash = (unsigned long)no_addr;
67 return ilookup5(sb, hash, iget_test, &no_addr); 80 struct gfs2_skip_data data;
81
82 data.no_addr = no_addr;
83 data.skipped = 0;
84 data.non_block = 0;
85 return ilookup5(sb, hash, iget_test, &data);
68} 86}
69 87
70static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 88static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
89 int non_block)
71{ 90{
91 struct gfs2_skip_data data;
72 unsigned long hash = (unsigned long)no_addr; 92 unsigned long hash = (unsigned long)no_addr;
73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 93
94 data.no_addr = no_addr;
95 data.skipped = 0;
96 data.non_block = non_block;
97 return iget5_locked(sb, hash, iget_test, iget_set, &data);
74} 98}
75 99
76/** 100/**
@@ -111,19 +135,20 @@ static void gfs2_set_iop(struct inode *inode)
111 * @sb: The super block 135 * @sb: The super block
112 * @no_addr: The inode number 136 * @no_addr: The inode number
113 * @type: The type of the inode 137 * @type: The type of the inode
138 * non_block: Can we block on inodes that are being freed?
114 * 139 *
115 * Returns: A VFS inode, or an error 140 * Returns: A VFS inode, or an error
116 */ 141 */
117 142
118struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, 143struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
119 u64 no_addr, u64 no_formal_ino) 144 u64 no_addr, u64 no_formal_ino, int non_block)
120{ 145{
121 struct inode *inode; 146 struct inode *inode;
122 struct gfs2_inode *ip; 147 struct gfs2_inode *ip;
123 struct gfs2_glock *io_gl = NULL; 148 struct gfs2_glock *io_gl = NULL;
124 int error; 149 int error;
125 150
126 inode = gfs2_iget(sb, no_addr); 151 inode = gfs2_iget(sb, no_addr, non_block);
127 ip = GFS2_I(inode); 152 ip = GFS2_I(inode);
128 153
129 if (!inode) 154 if (!inode)
@@ -185,11 +210,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
185{ 210{
186 struct super_block *sb = sdp->sd_vfs; 211 struct super_block *sb = sdp->sd_vfs;
187 struct gfs2_holder i_gh; 212 struct gfs2_holder i_gh;
188 struct inode *inode; 213 struct inode *inode = NULL;
189 int error; 214 int error;
190 215
216 /* Must not read in block until block type is verified */
191 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, 217 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
192 LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 218 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
193 if (error) 219 if (error)
194 return ERR_PTR(error); 220 return ERR_PTR(error);
195 221
@@ -197,7 +223,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
197 if (error) 223 if (error)
198 goto fail; 224 goto fail;
199 225
200 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); 226 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
201 if (IS_ERR(inode)) 227 if (IS_ERR(inode))
202 goto fail; 228 goto fail;
203 229
@@ -843,7 +869,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
843 goto fail_gunlock2; 869 goto fail_gunlock2;
844 870
845 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 871 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
846 inum.no_formal_ino); 872 inum.no_formal_ino, 0);
847 if (IS_ERR(inode)) 873 if (IS_ERR(inode))
848 goto fail_gunlock2; 874 goto fail_gunlock2;
849 875
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 3e00a66e7cbd..099ca305e518 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -97,7 +97,8 @@ err:
97} 97}
98 98
99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
100 u64 no_addr, u64 no_formal_ino); 100 u64 no_addr, u64 no_formal_ino,
101 int non_block);
101extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
102 u64 *no_formal_ino, 103 u64 *no_formal_ino,
103 unsigned int blktype); 104 unsigned int blktype);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e7ed31f858dd..5b102c1887fd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -121,7 +121,7 @@ __acquires(&sdp->sd_ail_lock)
121 lock_buffer(bh); 121 lock_buffer(bh);
122 if (test_clear_buffer_dirty(bh)) { 122 if (test_clear_buffer_dirty(bh)) {
123 bh->b_end_io = end_buffer_write_sync; 123 bh->b_end_io = end_buffer_write_sync;
124 submit_bh(WRITE_SYNC_PLUG, bh); 124 submit_bh(WRITE_SYNC, bh);
125 } else { 125 } else {
126 unlock_buffer(bh); 126 unlock_buffer(bh);
127 brelse(bh); 127 brelse(bh);
@@ -647,7 +647,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
647 lock_buffer(bh); 647 lock_buffer(bh);
648 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { 648 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
649 bh->b_end_io = end_buffer_write_sync; 649 bh->b_end_io = end_buffer_write_sync;
650 submit_bh(WRITE_SYNC_PLUG, bh); 650 submit_bh(WRITE_SYNC, bh);
651 } else { 651 } else {
652 unlock_buffer(bh); 652 unlock_buffer(bh);
653 brelse(bh); 653 brelse(bh);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index e919abf25ecd..51d27f00ebb4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -204,7 +204,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
204 } 204 }
205 205
206 gfs2_log_unlock(sdp); 206 gfs2_log_unlock(sdp);
207 submit_bh(WRITE_SYNC_PLUG, bh); 207 submit_bh(WRITE_SYNC, bh);
208 gfs2_log_lock(sdp); 208 gfs2_log_lock(sdp);
209 209
210 n = 0; 210 n = 0;
@@ -214,7 +214,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
214 gfs2_log_unlock(sdp); 214 gfs2_log_unlock(sdp);
215 lock_buffer(bd2->bd_bh); 215 lock_buffer(bd2->bd_bh);
216 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 216 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
217 submit_bh(WRITE_SYNC_PLUG, bh); 217 submit_bh(WRITE_SYNC, bh);
218 gfs2_log_lock(sdp); 218 gfs2_log_lock(sdp);
219 if (++n >= num) 219 if (++n >= num)
220 break; 220 break;
@@ -356,7 +356,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
356 sdp->sd_log_num_revoke--; 356 sdp->sd_log_num_revoke--;
357 357
358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
359 submit_bh(WRITE_SYNC_PLUG, bh); 359 submit_bh(WRITE_SYNC, bh);
360 360
361 bh = gfs2_log_get_buf(sdp); 361 bh = gfs2_log_get_buf(sdp);
362 mh = (struct gfs2_meta_header *)bh->b_data; 362 mh = (struct gfs2_meta_header *)bh->b_data;
@@ -373,7 +373,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
373 } 373 }
374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
375 375
376 submit_bh(WRITE_SYNC_PLUG, bh); 376 submit_bh(WRITE_SYNC, bh);
377} 377}
378 378
379static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 379static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -575,7 +575,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
575 ptr = bh_log_ptr(bh); 575 ptr = bh_log_ptr(bh);
576 576
577 get_bh(bh); 577 get_bh(bh);
578 submit_bh(WRITE_SYNC_PLUG, bh); 578 submit_bh(WRITE_SYNC, bh);
579 gfs2_log_lock(sdp); 579 gfs2_log_lock(sdp);
580 while(!list_empty(list)) { 580 while(!list_empty(list)) {
581 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); 581 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -601,7 +601,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
601 } else { 601 } else {
602 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); 602 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
603 } 603 }
604 submit_bh(WRITE_SYNC_PLUG, bh1); 604 submit_bh(WRITE_SYNC, bh1);
605 gfs2_log_lock(sdp); 605 gfs2_log_lock(sdp);
606 ptr += 2; 606 ptr += 2;
607 } 607 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 01d97f486553..675349b5a133 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
37 struct buffer_head *bh, *head; 37 struct buffer_head *bh, *head;
38 int nr_underway = 0; 38 int nr_underway = 0;
39 int write_op = REQ_META | 39 int write_op = REQ_META |
40 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE); 40 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
41 41
42 BUG_ON(!PageLocked(page)); 42 BUG_ON(!PageLocked(page));
43 BUG_ON(!page_has_buffers(page)); 43 BUG_ON(!page_has_buffers(page));
@@ -94,7 +94,6 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
94const struct address_space_operations gfs2_meta_aops = { 94const struct address_space_operations gfs2_meta_aops = {
95 .writepage = gfs2_aspace_writepage, 95 .writepage = gfs2_aspace_writepage,
96 .releasepage = gfs2_releasepage, 96 .releasepage = gfs2_releasepage,
97 .sync_page = block_sync_page,
98}; 97};
99 98
100/** 99/**
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 42ef24355afb..d3c69eb91c74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -430,7 +430,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
430 struct dentry *dentry; 430 struct dentry *dentry;
431 struct inode *inode; 431 struct inode *inode;
432 432
433 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); 433 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
434 if (IS_ERR(inode)) { 434 if (IS_ERR(inode)) {
435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
436 return PTR_ERR(inode); 436 return PTR_ERR(inode);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index cf930cd9664a..6fcae8469f6d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -945,7 +945,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
945 /* rgblk_search can return a block < goal, so we need to 945 /* rgblk_search can return a block < goal, so we need to
946 keep it marching forward. */ 946 keep it marching forward. */
947 no_addr = block + rgd->rd_data0; 947 no_addr = block + rgd->rd_data0;
948 goal++; 948 goal = max(block + 1, goal + 1);
949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
950 continue; 950 continue;
951 if (no_addr == skip) 951 if (no_addr == skip)
@@ -971,7 +971,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
971 found++; 971 found++;
972 972
973 /* Limit reclaim to sensible number of tasks */ 973 /* Limit reclaim to sensible number of tasks */
974 if (found > 2*NR_CPUS) 974 if (found > NR_CPUS)
975 return; 975 return;
976 } 976 }
977 977
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ec73ed70bae1..b9f28e66dad1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -657,7 +657,7 @@ out:
657 * @sdp: the file system 657 * @sdp: the file system
658 * 658 *
659 * This function flushes data and meta data for all machines by 659 * This function flushes data and meta data for all machines by
660 * aquiring the transaction log exclusively. All journals are 660 * acquiring the transaction log exclusively. All journals are
661 * ensured to be in a clean state as well. 661 * ensured to be in a clean state as well.
662 * 662 *
663 * Returns: errno 663 * Returns: errno
@@ -1318,15 +1318,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1318 1318
1319static void gfs2_evict_inode(struct inode *inode) 1319static void gfs2_evict_inode(struct inode *inode)
1320{ 1320{
1321 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 1321 struct super_block *sb = inode->i_sb;
1322 struct gfs2_sbd *sdp = sb->s_fs_info;
1322 struct gfs2_inode *ip = GFS2_I(inode); 1323 struct gfs2_inode *ip = GFS2_I(inode);
1323 struct gfs2_holder gh; 1324 struct gfs2_holder gh;
1324 int error; 1325 int error;
1325 1326
1326 if (inode->i_nlink) 1327 if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
1327 goto out; 1328 goto out;
1328 1329
1329 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1330 /* Must not read inode block until block type has been verified */
1331 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
1330 if (unlikely(error)) { 1332 if (unlikely(error)) {
1331 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1333 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1332 goto out; 1334 goto out;
@@ -1336,6 +1338,12 @@ static void gfs2_evict_inode(struct inode *inode)
1336 if (error) 1338 if (error)
1337 goto out_truncate; 1339 goto out_truncate;
1338 1340
1341 if (test_bit(GIF_INVALID, &ip->i_flags)) {
1342 error = gfs2_inode_refresh(ip);
1343 if (error)
1344 goto out_truncate;
1345 }
1346
1339 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 1347 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1340 gfs2_glock_dq_wait(&ip->i_iopen_gh); 1348 gfs2_glock_dq_wait(&ip->i_iopen_gh);
1341 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 1349 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dffb4e996643..fff16c968e67 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,6 @@ static int hfs_writepages(struct address_space *mapping,
150const struct address_space_operations hfs_btree_aops = { 150const struct address_space_operations hfs_btree_aops = {
151 .readpage = hfs_readpage, 151 .readpage = hfs_readpage,
152 .writepage = hfs_writepage, 152 .writepage = hfs_writepage,
153 .sync_page = block_sync_page,
154 .write_begin = hfs_write_begin, 153 .write_begin = hfs_write_begin,
155 .write_end = generic_write_end, 154 .write_end = generic_write_end,
156 .bmap = hfs_bmap, 155 .bmap = hfs_bmap,
@@ -160,7 +159,6 @@ const struct address_space_operations hfs_btree_aops = {
160const struct address_space_operations hfs_aops = { 159const struct address_space_operations hfs_aops = {
161 .readpage = hfs_readpage, 160 .readpage = hfs_readpage,
162 .writepage = hfs_writepage, 161 .writepage = hfs_writepage,
163 .sync_page = block_sync_page,
164 .write_begin = hfs_write_begin, 162 .write_begin = hfs_write_begin,
165 .write_end = generic_write_end, 163 .write_end = generic_write_end,
166 .bmap = hfs_bmap, 164 .bmap = hfs_bmap,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a8df651747f0..b248a6cfcad9 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -146,7 +146,6 @@ static int hfsplus_writepages(struct address_space *mapping,
146const struct address_space_operations hfsplus_btree_aops = { 146const struct address_space_operations hfsplus_btree_aops = {
147 .readpage = hfsplus_readpage, 147 .readpage = hfsplus_readpage,
148 .writepage = hfsplus_writepage, 148 .writepage = hfsplus_writepage,
149 .sync_page = block_sync_page,
150 .write_begin = hfsplus_write_begin, 149 .write_begin = hfsplus_write_begin,
151 .write_end = generic_write_end, 150 .write_end = generic_write_end,
152 .bmap = hfsplus_bmap, 151 .bmap = hfsplus_bmap,
@@ -156,7 +155,6 @@ const struct address_space_operations hfsplus_btree_aops = {
156const struct address_space_operations hfsplus_aops = { 155const struct address_space_operations hfsplus_aops = {
157 .readpage = hfsplus_readpage, 156 .readpage = hfsplus_readpage,
158 .writepage = hfsplus_writepage, 157 .writepage = hfsplus_writepage,
159 .sync_page = block_sync_page,
160 .write_begin = hfsplus_write_begin, 158 .write_begin = hfsplus_write_begin,
161 .write_end = generic_write_end, 159 .write_end = generic_write_end,
162 .bmap = hfsplus_bmap, 160 .bmap = hfsplus_bmap,
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 508ce662ce12..fbaa6690c8e0 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -47,7 +47,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
47 if (err) 47 if (err)
48 goto out; 48 goto out;
49 49
50 if (!is_owner_or_cap(inode)) { 50 if (!inode_owner_or_capable(inode)) {
51 err = -EACCES; 51 err = -EACCES;
52 goto out_drop_write; 52 goto out_drop_write;
53 } 53 }
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 2dbae20450f8..9b9eb6933e43 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -119,7 +119,6 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
119const struct address_space_operations hpfs_aops = { 119const struct address_space_operations hpfs_aops = {
120 .readpage = hpfs_readpage, 120 .readpage = hpfs_readpage,
121 .writepage = hpfs_writepage, 121 .writepage = hpfs_writepage,
122 .sync_page = block_sync_page,
123 .write_begin = hpfs_write_begin, 122 .write_begin = hpfs_write_begin,
124 .write_end = generic_write_end, 123 .write_end = generic_write_end,
125 .bmap = _hpfs_bmap 124 .bmap = _hpfs_bmap
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9885082b470f..b9eeb1cd03ff 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,8 +332,7 @@ static void truncate_huge_page(struct page *page)
332{ 332{
333 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 333 cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
334 ClearPageUptodate(page); 334 ClearPageUptodate(page);
335 remove_from_page_cache(page); 335 delete_from_page_cache(page);
336 put_page(page);
337} 336}
338 337
339static void truncate_hugepages(struct inode *inode, loff_t lstart) 338static void truncate_hugepages(struct inode *inode, loff_t lstart)
diff --git a/fs/inode.c b/fs/inode.c
index 9910c039f026..33c963d08ab4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,39 @@
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/ima.h> 27#include <linux/ima.h>
28#include <linux/cred.h>
29#include "internal.h"
30
31/*
32 * inode locking rules.
33 *
34 * inode->i_lock protects:
35 * inode->i_state, inode->i_hash, __iget()
36 * inode_lru_lock protects:
37 * inode_lru, inode->i_lru
38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects:
41 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
42 * inode_hash_lock protects:
43 * inode_hashtable, inode->i_hash
44 *
45 * Lock ordering:
46 *
47 * inode_sb_list_lock
48 * inode->i_lock
49 * inode_lru_lock
50 *
51 * inode_wb_list_lock
52 * inode->i_lock
53 *
54 * inode_hash_lock
55 * inode_sb_list_lock
56 * inode->i_lock
57 *
58 * iunique_lock
59 * inode_hash_lock
60 */
28 61
29/* 62/*
30 * This is needed for the following functions: 63 * This is needed for the following functions:
@@ -59,6 +92,8 @@
59 92
60static unsigned int i_hash_mask __read_mostly; 93static unsigned int i_hash_mask __read_mostly;
61static unsigned int i_hash_shift __read_mostly; 94static unsigned int i_hash_shift __read_mostly;
95static struct hlist_head *inode_hashtable __read_mostly;
96static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
62 97
63/* 98/*
64 * Each inode can be on two separate lists. One is 99 * Each inode can be on two separate lists. One is
@@ -73,15 +108,10 @@ static unsigned int i_hash_shift __read_mostly;
73 */ 108 */
74 109
75static LIST_HEAD(inode_lru); 110static LIST_HEAD(inode_lru);
76static struct hlist_head *inode_hashtable __read_mostly; 111static DEFINE_SPINLOCK(inode_lru_lock);
77 112
78/* 113__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
79 * A simple spinlock to protect the list manipulations. 114__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
80 *
81 * NOTE! You also have to own the lock if you change
82 * the i_state of an inode while it is in use..
83 */
84DEFINE_SPINLOCK(inode_lock);
85 115
86/* 116/*
87 * iprune_sem provides exclusion between the icache shrinking and the 117 * iprune_sem provides exclusion between the icache shrinking and the
@@ -95,6 +125,14 @@ DEFINE_SPINLOCK(inode_lock);
95static DECLARE_RWSEM(iprune_sem); 125static DECLARE_RWSEM(iprune_sem);
96 126
97/* 127/*
128 * Empty aops. Can be used for the cases where the user does not
129 * define any of the address_space operations.
130 */
131const struct address_space_operations empty_aops = {
132};
133EXPORT_SYMBOL(empty_aops);
134
135/*
98 * Statistics gathering.. 136 * Statistics gathering..
99 */ 137 */
100struct inodes_stat_t inodes_stat; 138struct inodes_stat_t inodes_stat;
@@ -136,15 +174,6 @@ int proc_nr_inodes(ctl_table *table, int write,
136} 174}
137#endif 175#endif
138 176
139static void wake_up_inode(struct inode *inode)
140{
141 /*
142 * Prevent speculative execution through spin_unlock(&inode_lock);
143 */
144 smp_mb();
145 wake_up_bit(&inode->i_state, __I_NEW);
146}
147
148/** 177/**
149 * inode_init_always - perform inode structure intialisation 178 * inode_init_always - perform inode structure intialisation
150 * @sb: superblock inode belongs to 179 * @sb: superblock inode belongs to
@@ -155,7 +184,6 @@ static void wake_up_inode(struct inode *inode)
155 */ 184 */
156int inode_init_always(struct super_block *sb, struct inode *inode) 185int inode_init_always(struct super_block *sb, struct inode *inode)
157{ 186{
158 static const struct address_space_operations empty_aops;
159 static const struct inode_operations empty_iops; 187 static const struct inode_operations empty_iops;
160 static const struct file_operations empty_fops; 188 static const struct file_operations empty_fops;
161 struct address_space *const mapping = &inode->i_data; 189 struct address_space *const mapping = &inode->i_data;
@@ -335,7 +363,7 @@ static void init_once(void *foo)
335} 363}
336 364
337/* 365/*
338 * inode_lock must be held 366 * inode->i_lock must be held
339 */ 367 */
340void __iget(struct inode *inode) 368void __iget(struct inode *inode)
341{ 369{
@@ -353,23 +381,22 @@ EXPORT_SYMBOL(ihold);
353 381
354static void inode_lru_list_add(struct inode *inode) 382static void inode_lru_list_add(struct inode *inode)
355{ 383{
384 spin_lock(&inode_lru_lock);
356 if (list_empty(&inode->i_lru)) { 385 if (list_empty(&inode->i_lru)) {
357 list_add(&inode->i_lru, &inode_lru); 386 list_add(&inode->i_lru, &inode_lru);
358 inodes_stat.nr_unused++; 387 inodes_stat.nr_unused++;
359 } 388 }
389 spin_unlock(&inode_lru_lock);
360} 390}
361 391
362static void inode_lru_list_del(struct inode *inode) 392static void inode_lru_list_del(struct inode *inode)
363{ 393{
394 spin_lock(&inode_lru_lock);
364 if (!list_empty(&inode->i_lru)) { 395 if (!list_empty(&inode->i_lru)) {
365 list_del_init(&inode->i_lru); 396 list_del_init(&inode->i_lru);
366 inodes_stat.nr_unused--; 397 inodes_stat.nr_unused--;
367 } 398 }
368} 399 spin_unlock(&inode_lru_lock);
369
370static inline void __inode_sb_list_add(struct inode *inode)
371{
372 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
373} 400}
374 401
375/** 402/**
@@ -378,15 +405,17 @@ static inline void __inode_sb_list_add(struct inode *inode)
378 */ 405 */
379void inode_sb_list_add(struct inode *inode) 406void inode_sb_list_add(struct inode *inode)
380{ 407{
381 spin_lock(&inode_lock); 408 spin_lock(&inode_sb_list_lock);
382 __inode_sb_list_add(inode); 409 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
383 spin_unlock(&inode_lock); 410 spin_unlock(&inode_sb_list_lock);
384} 411}
385EXPORT_SYMBOL_GPL(inode_sb_list_add); 412EXPORT_SYMBOL_GPL(inode_sb_list_add);
386 413
387static inline void __inode_sb_list_del(struct inode *inode) 414static inline void inode_sb_list_del(struct inode *inode)
388{ 415{
416 spin_lock(&inode_sb_list_lock);
389 list_del_init(&inode->i_sb_list); 417 list_del_init(&inode->i_sb_list);
418 spin_unlock(&inode_sb_list_lock);
390} 419}
391 420
392static unsigned long hash(struct super_block *sb, unsigned long hashval) 421static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -411,24 +440,15 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
411{ 440{
412 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 441 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
413 442
414 spin_lock(&inode_lock); 443 spin_lock(&inode_hash_lock);
444 spin_lock(&inode->i_lock);
415 hlist_add_head(&inode->i_hash, b); 445 hlist_add_head(&inode->i_hash, b);
416 spin_unlock(&inode_lock); 446 spin_unlock(&inode->i_lock);
447 spin_unlock(&inode_hash_lock);
417} 448}
418EXPORT_SYMBOL(__insert_inode_hash); 449EXPORT_SYMBOL(__insert_inode_hash);
419 450
420/** 451/**
421 * __remove_inode_hash - remove an inode from the hash
422 * @inode: inode to unhash
423 *
424 * Remove an inode from the superblock.
425 */
426static void __remove_inode_hash(struct inode *inode)
427{
428 hlist_del_init(&inode->i_hash);
429}
430
431/**
432 * remove_inode_hash - remove an inode from the hash 452 * remove_inode_hash - remove an inode from the hash
433 * @inode: inode to unhash 453 * @inode: inode to unhash
434 * 454 *
@@ -436,9 +456,11 @@ static void __remove_inode_hash(struct inode *inode)
436 */ 456 */
437void remove_inode_hash(struct inode *inode) 457void remove_inode_hash(struct inode *inode)
438{ 458{
439 spin_lock(&inode_lock); 459 spin_lock(&inode_hash_lock);
460 spin_lock(&inode->i_lock);
440 hlist_del_init(&inode->i_hash); 461 hlist_del_init(&inode->i_hash);
441 spin_unlock(&inode_lock); 462 spin_unlock(&inode->i_lock);
463 spin_unlock(&inode_hash_lock);
442} 464}
443EXPORT_SYMBOL(remove_inode_hash); 465EXPORT_SYMBOL(remove_inode_hash);
444 466
@@ -455,10 +477,29 @@ void end_writeback(struct inode *inode)
455} 477}
456EXPORT_SYMBOL(end_writeback); 478EXPORT_SYMBOL(end_writeback);
457 479
480/*
481 * Free the inode passed in, removing it from the lists it is still connected
482 * to. We remove any pages still attached to the inode and wait for any IO that
483 * is still in progress before finally destroying the inode.
484 *
485 * An inode must already be marked I_FREEING so that we avoid the inode being
486 * moved back onto lists if we race with other code that manipulates the lists
487 * (e.g. writeback_single_inode). The caller is responsible for setting this.
488 *
489 * An inode must already be removed from the LRU list before being evicted from
490 * the cache. This should occur atomically with setting the I_FREEING state
491 * flag, so no inodes here should ever be on the LRU when being evicted.
492 */
458static void evict(struct inode *inode) 493static void evict(struct inode *inode)
459{ 494{
460 const struct super_operations *op = inode->i_sb->s_op; 495 const struct super_operations *op = inode->i_sb->s_op;
461 496
497 BUG_ON(!(inode->i_state & I_FREEING));
498 BUG_ON(!list_empty(&inode->i_lru));
499
500 inode_wb_list_del(inode);
501 inode_sb_list_del(inode);
502
462 if (op->evict_inode) { 503 if (op->evict_inode) {
463 op->evict_inode(inode); 504 op->evict_inode(inode);
464 } else { 505 } else {
@@ -470,6 +511,15 @@ static void evict(struct inode *inode)
470 bd_forget(inode); 511 bd_forget(inode);
471 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 512 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
472 cd_forget(inode); 513 cd_forget(inode);
514
515 remove_inode_hash(inode);
516
517 spin_lock(&inode->i_lock);
518 wake_up_bit(&inode->i_state, __I_NEW);
519 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
520 spin_unlock(&inode->i_lock);
521
522 destroy_inode(inode);
473} 523}
474 524
475/* 525/*
@@ -488,14 +538,6 @@ static void dispose_list(struct list_head *head)
488 list_del_init(&inode->i_lru); 538 list_del_init(&inode->i_lru);
489 539
490 evict(inode); 540 evict(inode);
491
492 spin_lock(&inode_lock);
493 __remove_inode_hash(inode);
494 __inode_sb_list_del(inode);
495 spin_unlock(&inode_lock);
496
497 wake_up_inode(inode);
498 destroy_inode(inode);
499 } 541 }
500} 542}
501 543
@@ -513,25 +555,23 @@ void evict_inodes(struct super_block *sb)
513 struct inode *inode, *next; 555 struct inode *inode, *next;
514 LIST_HEAD(dispose); 556 LIST_HEAD(dispose);
515 557
516 spin_lock(&inode_lock); 558 spin_lock(&inode_sb_list_lock);
517 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 559 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
518 if (atomic_read(&inode->i_count)) 560 if (atomic_read(&inode->i_count))
519 continue; 561 continue;
520 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 562
563 spin_lock(&inode->i_lock);
564 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
565 spin_unlock(&inode->i_lock);
521 continue; 566 continue;
567 }
522 568
523 inode->i_state |= I_FREEING; 569 inode->i_state |= I_FREEING;
524 570 inode_lru_list_del(inode);
525 /* 571 spin_unlock(&inode->i_lock);
526 * Move the inode off the IO lists and LRU once I_FREEING is 572 list_add(&inode->i_lru, &dispose);
527 * set so that it won't get moved back on there if it is dirty.
528 */
529 list_move(&inode->i_lru, &dispose);
530 list_del_init(&inode->i_wb_list);
531 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
532 inodes_stat.nr_unused--;
533 } 573 }
534 spin_unlock(&inode_lock); 574 spin_unlock(&inode_sb_list_lock);
535 575
536 dispose_list(&dispose); 576 dispose_list(&dispose);
537 577
@@ -560,31 +600,30 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
560 struct inode *inode, *next; 600 struct inode *inode, *next;
561 LIST_HEAD(dispose); 601 LIST_HEAD(dispose);
562 602
563 spin_lock(&inode_lock); 603 spin_lock(&inode_sb_list_lock);
564 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 604 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
565 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 605 spin_lock(&inode->i_lock);
606 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
607 spin_unlock(&inode->i_lock);
566 continue; 608 continue;
609 }
567 if (inode->i_state & I_DIRTY && !kill_dirty) { 610 if (inode->i_state & I_DIRTY && !kill_dirty) {
611 spin_unlock(&inode->i_lock);
568 busy = 1; 612 busy = 1;
569 continue; 613 continue;
570 } 614 }
571 if (atomic_read(&inode->i_count)) { 615 if (atomic_read(&inode->i_count)) {
616 spin_unlock(&inode->i_lock);
572 busy = 1; 617 busy = 1;
573 continue; 618 continue;
574 } 619 }
575 620
576 inode->i_state |= I_FREEING; 621 inode->i_state |= I_FREEING;
577 622 inode_lru_list_del(inode);
578 /* 623 spin_unlock(&inode->i_lock);
579 * Move the inode off the IO lists and LRU once I_FREEING is 624 list_add(&inode->i_lru, &dispose);
580 * set so that it won't get moved back on there if it is dirty.
581 */
582 list_move(&inode->i_lru, &dispose);
583 list_del_init(&inode->i_wb_list);
584 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
585 inodes_stat.nr_unused--;
586 } 625 }
587 spin_unlock(&inode_lock); 626 spin_unlock(&inode_sb_list_lock);
588 627
589 dispose_list(&dispose); 628 dispose_list(&dispose);
590 629
@@ -606,7 +645,7 @@ static int can_unuse(struct inode *inode)
606 645
607/* 646/*
608 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 647 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
609 * temporary list and then are freed outside inode_lock by dispose_list(). 648 * temporary list and then are freed outside inode_lru_lock by dispose_list().
610 * 649 *
611 * Any inodes which are pinned purely because of attached pagecache have their 650 * Any inodes which are pinned purely because of attached pagecache have their
612 * pagecache removed. If the inode has metadata buffers attached to 651 * pagecache removed. If the inode has metadata buffers attached to
@@ -627,7 +666,7 @@ static void prune_icache(int nr_to_scan)
627 unsigned long reap = 0; 666 unsigned long reap = 0;
628 667
629 down_read(&iprune_sem); 668 down_read(&iprune_sem);
630 spin_lock(&inode_lock); 669 spin_lock(&inode_lru_lock);
631 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 670 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
632 struct inode *inode; 671 struct inode *inode;
633 672
@@ -637,53 +676,67 @@ static void prune_icache(int nr_to_scan)
637 inode = list_entry(inode_lru.prev, struct inode, i_lru); 676 inode = list_entry(inode_lru.prev, struct inode, i_lru);
638 677
639 /* 678 /*
679 * we are inverting the inode_lru_lock/inode->i_lock here,
680 * so use a trylock. If we fail to get the lock, just move the
681 * inode to the back of the list so we don't spin on it.
682 */
683 if (!spin_trylock(&inode->i_lock)) {
684 list_move(&inode->i_lru, &inode_lru);
685 continue;
686 }
687
688 /*
640 * Referenced or dirty inodes are still in use. Give them 689 * Referenced or dirty inodes are still in use. Give them
641 * another pass through the LRU as we canot reclaim them now. 690 * another pass through the LRU as we canot reclaim them now.
642 */ 691 */
643 if (atomic_read(&inode->i_count) || 692 if (atomic_read(&inode->i_count) ||
644 (inode->i_state & ~I_REFERENCED)) { 693 (inode->i_state & ~I_REFERENCED)) {
645 list_del_init(&inode->i_lru); 694 list_del_init(&inode->i_lru);
695 spin_unlock(&inode->i_lock);
646 inodes_stat.nr_unused--; 696 inodes_stat.nr_unused--;
647 continue; 697 continue;
648 } 698 }
649 699
650 /* recently referenced inodes get one more pass */ 700 /* recently referenced inodes get one more pass */
651 if (inode->i_state & I_REFERENCED) { 701 if (inode->i_state & I_REFERENCED) {
652 list_move(&inode->i_lru, &inode_lru);
653 inode->i_state &= ~I_REFERENCED; 702 inode->i_state &= ~I_REFERENCED;
703 list_move(&inode->i_lru, &inode_lru);
704 spin_unlock(&inode->i_lock);
654 continue; 705 continue;
655 } 706 }
656 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 707 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
657 __iget(inode); 708 __iget(inode);
658 spin_unlock(&inode_lock); 709 spin_unlock(&inode->i_lock);
710 spin_unlock(&inode_lru_lock);
659 if (remove_inode_buffers(inode)) 711 if (remove_inode_buffers(inode))
660 reap += invalidate_mapping_pages(&inode->i_data, 712 reap += invalidate_mapping_pages(&inode->i_data,
661 0, -1); 713 0, -1);
662 iput(inode); 714 iput(inode);
663 spin_lock(&inode_lock); 715 spin_lock(&inode_lru_lock);
664 716
665 if (inode != list_entry(inode_lru.next, 717 if (inode != list_entry(inode_lru.next,
666 struct inode, i_lru)) 718 struct inode, i_lru))
667 continue; /* wrong inode or list_empty */ 719 continue; /* wrong inode or list_empty */
668 if (!can_unuse(inode)) 720 /* avoid lock inversions with trylock */
721 if (!spin_trylock(&inode->i_lock))
722 continue;
723 if (!can_unuse(inode)) {
724 spin_unlock(&inode->i_lock);
669 continue; 725 continue;
726 }
670 } 727 }
671 WARN_ON(inode->i_state & I_NEW); 728 WARN_ON(inode->i_state & I_NEW);
672 inode->i_state |= I_FREEING; 729 inode->i_state |= I_FREEING;
730 spin_unlock(&inode->i_lock);
673 731
674 /*
675 * Move the inode off the IO lists and LRU once I_FREEING is
676 * set so that it won't get moved back on there if it is dirty.
677 */
678 list_move(&inode->i_lru, &freeable); 732 list_move(&inode->i_lru, &freeable);
679 list_del_init(&inode->i_wb_list);
680 inodes_stat.nr_unused--; 733 inodes_stat.nr_unused--;
681 } 734 }
682 if (current_is_kswapd()) 735 if (current_is_kswapd())
683 __count_vm_events(KSWAPD_INODESTEAL, reap); 736 __count_vm_events(KSWAPD_INODESTEAL, reap);
684 else 737 else
685 __count_vm_events(PGINODESTEAL, reap); 738 __count_vm_events(PGINODESTEAL, reap);
686 spin_unlock(&inode_lock); 739 spin_unlock(&inode_lru_lock);
687 740
688 dispose_list(&freeable); 741 dispose_list(&freeable);
689 up_read(&iprune_sem); 742 up_read(&iprune_sem);
@@ -732,15 +785,21 @@ static struct inode *find_inode(struct super_block *sb,
732 785
733repeat: 786repeat:
734 hlist_for_each_entry(inode, node, head, i_hash) { 787 hlist_for_each_entry(inode, node, head, i_hash) {
735 if (inode->i_sb != sb) 788 spin_lock(&inode->i_lock);
789 if (inode->i_sb != sb) {
790 spin_unlock(&inode->i_lock);
736 continue; 791 continue;
737 if (!test(inode, data)) 792 }
793 if (!test(inode, data)) {
794 spin_unlock(&inode->i_lock);
738 continue; 795 continue;
796 }
739 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 797 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
740 __wait_on_freeing_inode(inode); 798 __wait_on_freeing_inode(inode);
741 goto repeat; 799 goto repeat;
742 } 800 }
743 __iget(inode); 801 __iget(inode);
802 spin_unlock(&inode->i_lock);
744 return inode; 803 return inode;
745 } 804 }
746 return NULL; 805 return NULL;
@@ -758,15 +817,21 @@ static struct inode *find_inode_fast(struct super_block *sb,
758 817
759repeat: 818repeat:
760 hlist_for_each_entry(inode, node, head, i_hash) { 819 hlist_for_each_entry(inode, node, head, i_hash) {
761 if (inode->i_ino != ino) 820 spin_lock(&inode->i_lock);
821 if (inode->i_ino != ino) {
822 spin_unlock(&inode->i_lock);
762 continue; 823 continue;
763 if (inode->i_sb != sb) 824 }
825 if (inode->i_sb != sb) {
826 spin_unlock(&inode->i_lock);
764 continue; 827 continue;
828 }
765 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 829 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
766 __wait_on_freeing_inode(inode); 830 __wait_on_freeing_inode(inode);
767 goto repeat; 831 goto repeat;
768 } 832 }
769 __iget(inode); 833 __iget(inode);
834 spin_unlock(&inode->i_lock);
770 return inode; 835 return inode;
771 } 836 }
772 return NULL; 837 return NULL;
@@ -826,19 +891,26 @@ struct inode *new_inode(struct super_block *sb)
826{ 891{
827 struct inode *inode; 892 struct inode *inode;
828 893
829 spin_lock_prefetch(&inode_lock); 894 spin_lock_prefetch(&inode_sb_list_lock);
830 895
831 inode = alloc_inode(sb); 896 inode = alloc_inode(sb);
832 if (inode) { 897 if (inode) {
833 spin_lock(&inode_lock); 898 spin_lock(&inode->i_lock);
834 __inode_sb_list_add(inode);
835 inode->i_state = 0; 899 inode->i_state = 0;
836 spin_unlock(&inode_lock); 900 spin_unlock(&inode->i_lock);
901 inode_sb_list_add(inode);
837 } 902 }
838 return inode; 903 return inode;
839} 904}
840EXPORT_SYMBOL(new_inode); 905EXPORT_SYMBOL(new_inode);
841 906
907/**
908 * unlock_new_inode - clear the I_NEW state and wake up any waiters
909 * @inode: new inode to unlock
910 *
911 * Called when the inode is fully initialised to clear the new state of the
912 * inode and wake up anyone waiting for the inode to finish initialisation.
913 */
842void unlock_new_inode(struct inode *inode) 914void unlock_new_inode(struct inode *inode)
843{ 915{
844#ifdef CONFIG_DEBUG_LOCK_ALLOC 916#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -858,51 +930,67 @@ void unlock_new_inode(struct inode *inode)
858 } 930 }
859 } 931 }
860#endif 932#endif
861 /* 933 spin_lock(&inode->i_lock);
862 * This is special! We do not need the spinlock when clearing I_NEW,
863 * because we're guaranteed that nobody else tries to do anything about
864 * the state of the inode when it is locked, as we just created it (so
865 * there can be no old holders that haven't tested I_NEW).
866 * However we must emit the memory barrier so that other CPUs reliably
867 * see the clearing of I_NEW after the other inode initialisation has
868 * completed.
869 */
870 smp_mb();
871 WARN_ON(!(inode->i_state & I_NEW)); 934 WARN_ON(!(inode->i_state & I_NEW));
872 inode->i_state &= ~I_NEW; 935 inode->i_state &= ~I_NEW;
873 wake_up_inode(inode); 936 wake_up_bit(&inode->i_state, __I_NEW);
937 spin_unlock(&inode->i_lock);
874} 938}
875EXPORT_SYMBOL(unlock_new_inode); 939EXPORT_SYMBOL(unlock_new_inode);
876 940
877/* 941/**
878 * This is called without the inode lock held.. Be careful. 942 * iget5_locked - obtain an inode from a mounted file system
943 * @sb: super block of file system
944 * @hashval: hash value (usually inode number) to get
945 * @test: callback used for comparisons between inodes
946 * @set: callback used to initialize a new struct inode
947 * @data: opaque data pointer to pass to @test and @set
879 * 948 *
880 * We no longer cache the sb_flags in i_flags - see fs.h 949 * Search for the inode specified by @hashval and @data in the inode cache,
881 * -- rmk@arm.uk.linux.org 950 * and if present it is return it with an increased reference count. This is
951 * a generalized version of iget_locked() for file systems where the inode
952 * number is not sufficient for unique identification of an inode.
953 *
954 * If the inode is not in cache, allocate a new inode and return it locked,
955 * hashed, and with the I_NEW flag set. The file system gets to fill it in
956 * before unlocking it via unlock_new_inode().
957 *
958 * Note both @test and @set are called with the inode_hash_lock held, so can't
959 * sleep.
882 */ 960 */
883static struct inode *get_new_inode(struct super_block *sb, 961struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
884 struct hlist_head *head, 962 int (*test)(struct inode *, void *),
885 int (*test)(struct inode *, void *), 963 int (*set)(struct inode *, void *), void *data)
886 int (*set)(struct inode *, void *),
887 void *data)
888{ 964{
965 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
889 struct inode *inode; 966 struct inode *inode;
890 967
968 spin_lock(&inode_hash_lock);
969 inode = find_inode(sb, head, test, data);
970 spin_unlock(&inode_hash_lock);
971
972 if (inode) {
973 wait_on_inode(inode);
974 return inode;
975 }
976
891 inode = alloc_inode(sb); 977 inode = alloc_inode(sb);
892 if (inode) { 978 if (inode) {
893 struct inode *old; 979 struct inode *old;
894 980
895 spin_lock(&inode_lock); 981 spin_lock(&inode_hash_lock);
896 /* We released the lock, so.. */ 982 /* We released the lock, so.. */
897 old = find_inode(sb, head, test, data); 983 old = find_inode(sb, head, test, data);
898 if (!old) { 984 if (!old) {
899 if (set(inode, data)) 985 if (set(inode, data))
900 goto set_failed; 986 goto set_failed;
901 987
902 hlist_add_head(&inode->i_hash, head); 988 spin_lock(&inode->i_lock);
903 __inode_sb_list_add(inode);
904 inode->i_state = I_NEW; 989 inode->i_state = I_NEW;
905 spin_unlock(&inode_lock); 990 hlist_add_head(&inode->i_hash, head);
991 spin_unlock(&inode->i_lock);
992 inode_sb_list_add(inode);
993 spin_unlock(&inode_hash_lock);
906 994
907 /* Return the locked inode with I_NEW set, the 995 /* Return the locked inode with I_NEW set, the
908 * caller is responsible for filling in the contents 996 * caller is responsible for filling in the contents
@@ -915,7 +1003,7 @@ static struct inode *get_new_inode(struct super_block *sb,
915 * us. Use the old inode instead of the one we just 1003 * us. Use the old inode instead of the one we just
916 * allocated. 1004 * allocated.
917 */ 1005 */
918 spin_unlock(&inode_lock); 1006 spin_unlock(&inode_hash_lock);
919 destroy_inode(inode); 1007 destroy_inode(inode);
920 inode = old; 1008 inode = old;
921 wait_on_inode(inode); 1009 wait_on_inode(inode);
@@ -923,33 +1011,53 @@ static struct inode *get_new_inode(struct super_block *sb,
923 return inode; 1011 return inode;
924 1012
925set_failed: 1013set_failed:
926 spin_unlock(&inode_lock); 1014 spin_unlock(&inode_hash_lock);
927 destroy_inode(inode); 1015 destroy_inode(inode);
928 return NULL; 1016 return NULL;
929} 1017}
1018EXPORT_SYMBOL(iget5_locked);
930 1019
931/* 1020/**
932 * get_new_inode_fast is the fast path version of get_new_inode, see the 1021 * iget_locked - obtain an inode from a mounted file system
933 * comment at iget_locked for details. 1022 * @sb: super block of file system
1023 * @ino: inode number to get
1024 *
1025 * Search for the inode specified by @ino in the inode cache and if present
1026 * return it with an increased reference count. This is for file systems
1027 * where the inode number is sufficient for unique identification of an inode.
1028 *
1029 * If the inode is not in cache, allocate a new inode and return it locked,
1030 * hashed, and with the I_NEW flag set. The file system gets to fill it in
1031 * before unlocking it via unlock_new_inode().
934 */ 1032 */
935static struct inode *get_new_inode_fast(struct super_block *sb, 1033struct inode *iget_locked(struct super_block *sb, unsigned long ino)
936 struct hlist_head *head, unsigned long ino)
937{ 1034{
1035 struct hlist_head *head = inode_hashtable + hash(sb, ino);
938 struct inode *inode; 1036 struct inode *inode;
939 1037
1038 spin_lock(&inode_hash_lock);
1039 inode = find_inode_fast(sb, head, ino);
1040 spin_unlock(&inode_hash_lock);
1041 if (inode) {
1042 wait_on_inode(inode);
1043 return inode;
1044 }
1045
940 inode = alloc_inode(sb); 1046 inode = alloc_inode(sb);
941 if (inode) { 1047 if (inode) {
942 struct inode *old; 1048 struct inode *old;
943 1049
944 spin_lock(&inode_lock); 1050 spin_lock(&inode_hash_lock);
945 /* We released the lock, so.. */ 1051 /* We released the lock, so.. */
946 old = find_inode_fast(sb, head, ino); 1052 old = find_inode_fast(sb, head, ino);
947 if (!old) { 1053 if (!old) {
948 inode->i_ino = ino; 1054 inode->i_ino = ino;
949 hlist_add_head(&inode->i_hash, head); 1055 spin_lock(&inode->i_lock);
950 __inode_sb_list_add(inode);
951 inode->i_state = I_NEW; 1056 inode->i_state = I_NEW;
952 spin_unlock(&inode_lock); 1057 hlist_add_head(&inode->i_hash, head);
1058 spin_unlock(&inode->i_lock);
1059 inode_sb_list_add(inode);
1060 spin_unlock(&inode_hash_lock);
953 1061
954 /* Return the locked inode with I_NEW set, the 1062 /* Return the locked inode with I_NEW set, the
955 * caller is responsible for filling in the contents 1063 * caller is responsible for filling in the contents
@@ -962,13 +1070,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
962 * us. Use the old inode instead of the one we just 1070 * us. Use the old inode instead of the one we just
963 * allocated. 1071 * allocated.
964 */ 1072 */
965 spin_unlock(&inode_lock); 1073 spin_unlock(&inode_hash_lock);
966 destroy_inode(inode); 1074 destroy_inode(inode);
967 inode = old; 1075 inode = old;
968 wait_on_inode(inode); 1076 wait_on_inode(inode);
969 } 1077 }
970 return inode; 1078 return inode;
971} 1079}
1080EXPORT_SYMBOL(iget_locked);
972 1081
973/* 1082/*
974 * search the inode cache for a matching inode number. 1083 * search the inode cache for a matching inode number.
@@ -983,10 +1092,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino)
983 struct hlist_node *node; 1092 struct hlist_node *node;
984 struct inode *inode; 1093 struct inode *inode;
985 1094
1095 spin_lock(&inode_hash_lock);
986 hlist_for_each_entry(inode, node, b, i_hash) { 1096 hlist_for_each_entry(inode, node, b, i_hash) {
987 if (inode->i_ino == ino && inode->i_sb == sb) 1097 if (inode->i_ino == ino && inode->i_sb == sb) {
1098 spin_unlock(&inode_hash_lock);
988 return 0; 1099 return 0;
1100 }
989 } 1101 }
1102 spin_unlock(&inode_hash_lock);
990 1103
991 return 1; 1104 return 1;
992} 1105}
@@ -1016,7 +1129,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
1016 static unsigned int counter; 1129 static unsigned int counter;
1017 ino_t res; 1130 ino_t res;
1018 1131
1019 spin_lock(&inode_lock);
1020 spin_lock(&iunique_lock); 1132 spin_lock(&iunique_lock);
1021 do { 1133 do {
1022 if (counter <= max_reserved) 1134 if (counter <= max_reserved)
@@ -1024,7 +1136,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
1024 res = counter++; 1136 res = counter++;
1025 } while (!test_inode_iunique(sb, res)); 1137 } while (!test_inode_iunique(sb, res));
1026 spin_unlock(&iunique_lock); 1138 spin_unlock(&iunique_lock);
1027 spin_unlock(&inode_lock);
1028 1139
1029 return res; 1140 return res;
1030} 1141}
@@ -1032,116 +1143,50 @@ EXPORT_SYMBOL(iunique);
1032 1143
1033struct inode *igrab(struct inode *inode) 1144struct inode *igrab(struct inode *inode)
1034{ 1145{
1035 spin_lock(&inode_lock); 1146 spin_lock(&inode->i_lock);
1036 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1147 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1037 __iget(inode); 1148 __iget(inode);
1038 else 1149 spin_unlock(&inode->i_lock);
1150 } else {
1151 spin_unlock(&inode->i_lock);
1039 /* 1152 /*
1040 * Handle the case where s_op->clear_inode is not been 1153 * Handle the case where s_op->clear_inode is not been
1041 * called yet, and somebody is calling igrab 1154 * called yet, and somebody is calling igrab
1042 * while the inode is getting freed. 1155 * while the inode is getting freed.
1043 */ 1156 */
1044 inode = NULL; 1157 inode = NULL;
1045 spin_unlock(&inode_lock); 1158 }
1046 return inode; 1159 return inode;
1047} 1160}
1048EXPORT_SYMBOL(igrab); 1161EXPORT_SYMBOL(igrab);
1049 1162
1050/** 1163/**
1051 * ifind - internal function, you want ilookup5() or iget5().
1052 * @sb: super block of file system to search
1053 * @head: the head of the list to search
1054 * @test: callback used for comparisons between inodes
1055 * @data: opaque data pointer to pass to @test
1056 * @wait: if true wait for the inode to be unlocked, if false do not
1057 *
1058 * ifind() searches for the inode specified by @data in the inode
1059 * cache. This is a generalized version of ifind_fast() for file systems where
1060 * the inode number is not sufficient for unique identification of an inode.
1061 *
1062 * If the inode is in the cache, the inode is returned with an incremented
1063 * reference count.
1064 *
1065 * Otherwise NULL is returned.
1066 *
1067 * Note, @test is called with the inode_lock held, so can't sleep.
1068 */
1069static struct inode *ifind(struct super_block *sb,
1070 struct hlist_head *head, int (*test)(struct inode *, void *),
1071 void *data, const int wait)
1072{
1073 struct inode *inode;
1074
1075 spin_lock(&inode_lock);
1076 inode = find_inode(sb, head, test, data);
1077 if (inode) {
1078 spin_unlock(&inode_lock);
1079 if (likely(wait))
1080 wait_on_inode(inode);
1081 return inode;
1082 }
1083 spin_unlock(&inode_lock);
1084 return NULL;
1085}
1086
1087/**
1088 * ifind_fast - internal function, you want ilookup() or iget().
1089 * @sb: super block of file system to search
1090 * @head: head of the list to search
1091 * @ino: inode number to search for
1092 *
1093 * ifind_fast() searches for the inode @ino in the inode cache. This is for
1094 * file systems where the inode number is sufficient for unique identification
1095 * of an inode.
1096 *
1097 * If the inode is in the cache, the inode is returned with an incremented
1098 * reference count.
1099 *
1100 * Otherwise NULL is returned.
1101 */
1102static struct inode *ifind_fast(struct super_block *sb,
1103 struct hlist_head *head, unsigned long ino)
1104{
1105 struct inode *inode;
1106
1107 spin_lock(&inode_lock);
1108 inode = find_inode_fast(sb, head, ino);
1109 if (inode) {
1110 spin_unlock(&inode_lock);
1111 wait_on_inode(inode);
1112 return inode;
1113 }
1114 spin_unlock(&inode_lock);
1115 return NULL;
1116}
1117
1118/**
1119 * ilookup5_nowait - search for an inode in the inode cache 1164 * ilookup5_nowait - search for an inode in the inode cache
1120 * @sb: super block of file system to search 1165 * @sb: super block of file system to search
1121 * @hashval: hash value (usually inode number) to search for 1166 * @hashval: hash value (usually inode number) to search for
1122 * @test: callback used for comparisons between inodes 1167 * @test: callback used for comparisons between inodes
1123 * @data: opaque data pointer to pass to @test 1168 * @data: opaque data pointer to pass to @test
1124 * 1169 *
1125 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1170 * Search for the inode specified by @hashval and @data in the inode cache.
1126 * @data in the inode cache. This is a generalized version of ilookup() for
1127 * file systems where the inode number is not sufficient for unique
1128 * identification of an inode.
1129 *
1130 * If the inode is in the cache, the inode is returned with an incremented 1171 * If the inode is in the cache, the inode is returned with an incremented
1131 * reference count. Note, the inode lock is not waited upon so you have to be 1172 * reference count.
1132 * very careful what you do with the returned inode. You probably should be
1133 * using ilookup5() instead.
1134 * 1173 *
1135 * Otherwise NULL is returned. 1174 * Note: I_NEW is not waited upon so you have to be very careful what you do
1175 * with the returned inode. You probably should be using ilookup5() instead.
1136 * 1176 *
1137 * Note, @test is called with the inode_lock held, so can't sleep. 1177 * Note2: @test is called with the inode_hash_lock held, so can't sleep.
1138 */ 1178 */
1139struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1179struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1140 int (*test)(struct inode *, void *), void *data) 1180 int (*test)(struct inode *, void *), void *data)
1141{ 1181{
1142 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1182 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1183 struct inode *inode;
1184
1185 spin_lock(&inode_hash_lock);
1186 inode = find_inode(sb, head, test, data);
1187 spin_unlock(&inode_hash_lock);
1143 1188
1144 return ifind(sb, head, test, data, 0); 1189 return inode;
1145} 1190}
1146EXPORT_SYMBOL(ilookup5_nowait); 1191EXPORT_SYMBOL(ilookup5_nowait);
1147 1192
@@ -1152,24 +1197,24 @@ EXPORT_SYMBOL(ilookup5_nowait);
1152 * @test: callback used for comparisons between inodes 1197 * @test: callback used for comparisons between inodes
1153 * @data: opaque data pointer to pass to @test 1198 * @data: opaque data pointer to pass to @test
1154 * 1199 *
1155 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1200 * Search for the inode specified by @hashval and @data in the inode cache,
1156 * @data in the inode cache. This is a generalized version of ilookup() for 1201 * and if the inode is in the cache, return the inode with an incremented
1157 * file systems where the inode number is not sufficient for unique 1202 * reference count. Waits on I_NEW before returning the inode.
1158 * identification of an inode.
1159 *
1160 * If the inode is in the cache, the inode lock is waited upon and the inode is
1161 * returned with an incremented reference count. 1203 * returned with an incremented reference count.
1162 * 1204 *
1163 * Otherwise NULL is returned. 1205 * This is a generalized version of ilookup() for file systems where the
1206 * inode number is not sufficient for unique identification of an inode.
1164 * 1207 *
1165 * Note, @test is called with the inode_lock held, so can't sleep. 1208 * Note: @test is called with the inode_hash_lock held, so can't sleep.
1166 */ 1209 */
1167struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1210struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1168 int (*test)(struct inode *, void *), void *data) 1211 int (*test)(struct inode *, void *), void *data)
1169{ 1212{
1170 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1213 struct inode *inode = ilookup5_nowait(sb, hashval, test, data);
1171 1214
1172 return ifind(sb, head, test, data, 1); 1215 if (inode)
1216 wait_on_inode(inode);
1217 return inode;
1173} 1218}
1174EXPORT_SYMBOL(ilookup5); 1219EXPORT_SYMBOL(ilookup5);
1175 1220
@@ -1178,91 +1223,23 @@ EXPORT_SYMBOL(ilookup5);
1178 * @sb: super block of file system to search 1223 * @sb: super block of file system to search
1179 * @ino: inode number to search for 1224 * @ino: inode number to search for
1180 * 1225 *
1181 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 1226 * Search for the inode @ino in the inode cache, and if the inode is in the
1182 * This is for file systems where the inode number is sufficient for unique 1227 * cache, the inode is returned with an incremented reference count.
1183 * identification of an inode.
1184 *
1185 * If the inode is in the cache, the inode is returned with an incremented
1186 * reference count.
1187 *
1188 * Otherwise NULL is returned.
1189 */ 1228 */
1190struct inode *ilookup(struct super_block *sb, unsigned long ino) 1229struct inode *ilookup(struct super_block *sb, unsigned long ino)
1191{ 1230{
1192 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1231 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1193
1194 return ifind_fast(sb, head, ino);
1195}
1196EXPORT_SYMBOL(ilookup);
1197
1198/**
1199 * iget5_locked - obtain an inode from a mounted file system
1200 * @sb: super block of file system
1201 * @hashval: hash value (usually inode number) to get
1202 * @test: callback used for comparisons between inodes
1203 * @set: callback used to initialize a new struct inode
1204 * @data: opaque data pointer to pass to @test and @set
1205 *
1206 * iget5_locked() uses ifind() to search for the inode specified by @hashval
1207 * and @data in the inode cache and if present it is returned with an increased
1208 * reference count. This is a generalized version of iget_locked() for file
1209 * systems where the inode number is not sufficient for unique identification
1210 * of an inode.
1211 *
1212 * If the inode is not in cache, get_new_inode() is called to allocate a new
1213 * inode and this is returned locked, hashed, and with the I_NEW flag set. The
1214 * file system gets to fill it in before unlocking it via unlock_new_inode().
1215 *
1216 * Note both @test and @set are called with the inode_lock held, so can't sleep.
1217 */
1218struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1219 int (*test)(struct inode *, void *),
1220 int (*set)(struct inode *, void *), void *data)
1221{
1222 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1223 struct inode *inode; 1232 struct inode *inode;
1224 1233
1225 inode = ifind(sb, head, test, data, 1); 1234 spin_lock(&inode_hash_lock);
1226 if (inode) 1235 inode = find_inode_fast(sb, head, ino);
1227 return inode; 1236 spin_unlock(&inode_hash_lock);
1228 /*
1229 * get_new_inode() will do the right thing, re-trying the search
1230 * in case it had to block at any point.
1231 */
1232 return get_new_inode(sb, head, test, set, data);
1233}
1234EXPORT_SYMBOL(iget5_locked);
1235
1236/**
1237 * iget_locked - obtain an inode from a mounted file system
1238 * @sb: super block of file system
1239 * @ino: inode number to get
1240 *
1241 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in
1242 * the inode cache and if present it is returned with an increased reference
1243 * count. This is for file systems where the inode number is sufficient for
1244 * unique identification of an inode.
1245 *
1246 * If the inode is not in cache, get_new_inode_fast() is called to allocate a
1247 * new inode and this is returned locked, hashed, and with the I_NEW flag set.
1248 * The file system gets to fill it in before unlocking it via
1249 * unlock_new_inode().
1250 */
1251struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1252{
1253 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1254 struct inode *inode;
1255 1237
1256 inode = ifind_fast(sb, head, ino);
1257 if (inode) 1238 if (inode)
1258 return inode; 1239 wait_on_inode(inode);
1259 /* 1240 return inode;
1260 * get_new_inode_fast() will do the right thing, re-trying the search
1261 * in case it had to block at any point.
1262 */
1263 return get_new_inode_fast(sb, head, ino);
1264} 1241}
1265EXPORT_SYMBOL(iget_locked); 1242EXPORT_SYMBOL(ilookup);
1266 1243
1267int insert_inode_locked(struct inode *inode) 1244int insert_inode_locked(struct inode *inode)
1268{ 1245{
@@ -1270,27 +1247,33 @@ int insert_inode_locked(struct inode *inode)
1270 ino_t ino = inode->i_ino; 1247 ino_t ino = inode->i_ino;
1271 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1248 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1272 1249
1273 inode->i_state |= I_NEW;
1274 while (1) { 1250 while (1) {
1275 struct hlist_node *node; 1251 struct hlist_node *node;
1276 struct inode *old = NULL; 1252 struct inode *old = NULL;
1277 spin_lock(&inode_lock); 1253 spin_lock(&inode_hash_lock);
1278 hlist_for_each_entry(old, node, head, i_hash) { 1254 hlist_for_each_entry(old, node, head, i_hash) {
1279 if (old->i_ino != ino) 1255 if (old->i_ino != ino)
1280 continue; 1256 continue;
1281 if (old->i_sb != sb) 1257 if (old->i_sb != sb)
1282 continue; 1258 continue;
1283 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1259 spin_lock(&old->i_lock);
1260 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1261 spin_unlock(&old->i_lock);
1284 continue; 1262 continue;
1263 }
1285 break; 1264 break;
1286 } 1265 }
1287 if (likely(!node)) { 1266 if (likely(!node)) {
1267 spin_lock(&inode->i_lock);
1268 inode->i_state |= I_NEW;
1288 hlist_add_head(&inode->i_hash, head); 1269 hlist_add_head(&inode->i_hash, head);
1289 spin_unlock(&inode_lock); 1270 spin_unlock(&inode->i_lock);
1271 spin_unlock(&inode_hash_lock);
1290 return 0; 1272 return 0;
1291 } 1273 }
1292 __iget(old); 1274 __iget(old);
1293 spin_unlock(&inode_lock); 1275 spin_unlock(&old->i_lock);
1276 spin_unlock(&inode_hash_lock);
1294 wait_on_inode(old); 1277 wait_on_inode(old);
1295 if (unlikely(!inode_unhashed(old))) { 1278 if (unlikely(!inode_unhashed(old))) {
1296 iput(old); 1279 iput(old);
@@ -1307,29 +1290,34 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1307 struct super_block *sb = inode->i_sb; 1290 struct super_block *sb = inode->i_sb;
1308 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1291 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1309 1292
1310 inode->i_state |= I_NEW;
1311
1312 while (1) { 1293 while (1) {
1313 struct hlist_node *node; 1294 struct hlist_node *node;
1314 struct inode *old = NULL; 1295 struct inode *old = NULL;
1315 1296
1316 spin_lock(&inode_lock); 1297 spin_lock(&inode_hash_lock);
1317 hlist_for_each_entry(old, node, head, i_hash) { 1298 hlist_for_each_entry(old, node, head, i_hash) {
1318 if (old->i_sb != sb) 1299 if (old->i_sb != sb)
1319 continue; 1300 continue;
1320 if (!test(old, data)) 1301 if (!test(old, data))
1321 continue; 1302 continue;
1322 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1303 spin_lock(&old->i_lock);
1304 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1305 spin_unlock(&old->i_lock);
1323 continue; 1306 continue;
1307 }
1324 break; 1308 break;
1325 } 1309 }
1326 if (likely(!node)) { 1310 if (likely(!node)) {
1311 spin_lock(&inode->i_lock);
1312 inode->i_state |= I_NEW;
1327 hlist_add_head(&inode->i_hash, head); 1313 hlist_add_head(&inode->i_hash, head);
1328 spin_unlock(&inode_lock); 1314 spin_unlock(&inode->i_lock);
1315 spin_unlock(&inode_hash_lock);
1329 return 0; 1316 return 0;
1330 } 1317 }
1331 __iget(old); 1318 __iget(old);
1332 spin_unlock(&inode_lock); 1319 spin_unlock(&old->i_lock);
1320 spin_unlock(&inode_hash_lock);
1333 wait_on_inode(old); 1321 wait_on_inode(old);
1334 if (unlikely(!inode_unhashed(old))) { 1322 if (unlikely(!inode_unhashed(old))) {
1335 iput(old); 1323 iput(old);
@@ -1374,47 +1362,35 @@ static void iput_final(struct inode *inode)
1374 const struct super_operations *op = inode->i_sb->s_op; 1362 const struct super_operations *op = inode->i_sb->s_op;
1375 int drop; 1363 int drop;
1376 1364
1365 WARN_ON(inode->i_state & I_NEW);
1366
1377 if (op && op->drop_inode) 1367 if (op && op->drop_inode)
1378 drop = op->drop_inode(inode); 1368 drop = op->drop_inode(inode);
1379 else 1369 else
1380 drop = generic_drop_inode(inode); 1370 drop = generic_drop_inode(inode);
1381 1371
1372 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1373 inode->i_state |= I_REFERENCED;
1374 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1375 inode_lru_list_add(inode);
1376 spin_unlock(&inode->i_lock);
1377 return;
1378 }
1379
1382 if (!drop) { 1380 if (!drop) {
1383 if (sb->s_flags & MS_ACTIVE) {
1384 inode->i_state |= I_REFERENCED;
1385 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
1386 inode_lru_list_add(inode);
1387 }
1388 spin_unlock(&inode_lock);
1389 return;
1390 }
1391 WARN_ON(inode->i_state & I_NEW);
1392 inode->i_state |= I_WILL_FREE; 1381 inode->i_state |= I_WILL_FREE;
1393 spin_unlock(&inode_lock); 1382 spin_unlock(&inode->i_lock);
1394 write_inode_now(inode, 1); 1383 write_inode_now(inode, 1);
1395 spin_lock(&inode_lock); 1384 spin_lock(&inode->i_lock);
1396 WARN_ON(inode->i_state & I_NEW); 1385 WARN_ON(inode->i_state & I_NEW);
1397 inode->i_state &= ~I_WILL_FREE; 1386 inode->i_state &= ~I_WILL_FREE;
1398 __remove_inode_hash(inode);
1399 } 1387 }
1400 1388
1401 WARN_ON(inode->i_state & I_NEW);
1402 inode->i_state |= I_FREEING; 1389 inode->i_state |= I_FREEING;
1403
1404 /*
1405 * Move the inode off the IO lists and LRU once I_FREEING is
1406 * set so that it won't get moved back on there if it is dirty.
1407 */
1408 inode_lru_list_del(inode); 1390 inode_lru_list_del(inode);
1409 list_del_init(&inode->i_wb_list); 1391 spin_unlock(&inode->i_lock);
1410 1392
1411 __inode_sb_list_del(inode);
1412 spin_unlock(&inode_lock);
1413 evict(inode); 1393 evict(inode);
1414 remove_inode_hash(inode);
1415 wake_up_inode(inode);
1416 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1417 destroy_inode(inode);
1418} 1394}
1419 1395
1420/** 1396/**
@@ -1431,7 +1407,7 @@ void iput(struct inode *inode)
1431 if (inode) { 1407 if (inode) {
1432 BUG_ON(inode->i_state & I_CLEAR); 1408 BUG_ON(inode->i_state & I_CLEAR);
1433 1409
1434 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1410 if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
1435 iput_final(inode); 1411 iput_final(inode);
1436 } 1412 }
1437} 1413}
@@ -1610,9 +1586,8 @@ EXPORT_SYMBOL(inode_wait);
1610 * to recheck inode state. 1586 * to recheck inode state.
1611 * 1587 *
1612 * It doesn't matter if I_NEW is not set initially, a call to 1588 * It doesn't matter if I_NEW is not set initially, a call to
1613 * wake_up_inode() after removing from the hash list will DTRT. 1589 * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
1614 * 1590 * will DTRT.
1615 * This is called with inode_lock held.
1616 */ 1591 */
1617static void __wait_on_freeing_inode(struct inode *inode) 1592static void __wait_on_freeing_inode(struct inode *inode)
1618{ 1593{
@@ -1620,10 +1595,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
1620 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1595 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1621 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1596 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1622 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1597 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1623 spin_unlock(&inode_lock); 1598 spin_unlock(&inode->i_lock);
1599 spin_unlock(&inode_hash_lock);
1624 schedule(); 1600 schedule();
1625 finish_wait(wq, &wait.wait); 1601 finish_wait(wq, &wait.wait);
1626 spin_lock(&inode_lock); 1602 spin_lock(&inode_hash_lock);
1627} 1603}
1628 1604
1629static __initdata unsigned long ihash_entries; 1605static __initdata unsigned long ihash_entries;
@@ -1715,7 +1691,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1715EXPORT_SYMBOL(init_special_inode); 1691EXPORT_SYMBOL(init_special_inode);
1716 1692
1717/** 1693/**
1718 * Init uid,gid,mode for new inode according to posix standards 1694 * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
1719 * @inode: New inode 1695 * @inode: New inode
1720 * @dir: Directory inode 1696 * @dir: Directory inode
1721 * @mode: mode of the new inode 1697 * @mode: mode of the new inode
@@ -1733,3 +1709,22 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
1733 inode->i_mode = mode; 1709 inode->i_mode = mode;
1734} 1710}
1735EXPORT_SYMBOL(inode_init_owner); 1711EXPORT_SYMBOL(inode_init_owner);
1712
1713/**
1714 * inode_owner_or_capable - check current task permissions to inode
1715 * @inode: inode being checked
1716 *
1717 * Return true if current either has CAP_FOWNER to the inode, or
1718 * owns the file.
1719 */
1720bool inode_owner_or_capable(const struct inode *inode)
1721{
1722 struct user_namespace *ns = inode_userns(inode);
1723
1724 if (current_user_ns() == ns && current_fsuid() == inode->i_uid)
1725 return true;
1726 if (ns_capable(ns, CAP_FOWNER))
1727 return true;
1728 return false;
1729}
1730EXPORT_SYMBOL(inode_owner_or_capable);
diff --git a/fs/internal.h b/fs/internal.h
index 17191546d527..b29c46e4e32f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -64,6 +64,7 @@ extern int copy_mount_string(const void __user *, char **);
64 64
65extern unsigned int mnt_get_count(struct vfsmount *mnt); 65extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern struct vfsmount *lookup_mnt(struct path *);
67extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
68 struct vfsmount *); 69 struct vfsmount *);
69extern void release_mounts(struct list_head *); 70extern void release_mounts(struct list_head *);
@@ -124,6 +125,13 @@ extern long do_handle_open(int mountdirfd,
124/* 125/*
125 * inode.c 126 * inode.c
126 */ 127 */
128extern spinlock_t inode_sb_list_lock;
129
130/*
131 * fs-writeback.c
132 */
133extern void inode_wb_list_del(struct inode *inode);
134
127extern int get_nr_dirty_inodes(void); 135extern int get_nr_dirty_inodes(void);
128extern void evict_inodes(struct super_block *); 136extern void evict_inodes(struct super_block *);
129extern int invalidate_inodes(struct super_block *, bool); 137extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1eebeb72b202..1d9b9fcb2db4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -548,6 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
548{ 548{
549 int error = 0; 549 int error = 0;
550 int __user *argp = (int __user *)arg; 550 int __user *argp = (int __user *)arg;
551 struct inode *inode = filp->f_path.dentry->d_inode;
551 552
552 switch (cmd) { 553 switch (cmd) {
553 case FIOCLEX: 554 case FIOCLEX:
@@ -567,13 +568,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
567 break; 568 break;
568 569
569 case FIOQSIZE: 570 case FIOQSIZE:
570 if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || 571 if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
571 S_ISREG(filp->f_path.dentry->d_inode->i_mode) || 572 S_ISLNK(inode->i_mode)) {
572 S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { 573 loff_t res = inode_get_bytes(inode);
573 loff_t res = 574 error = copy_to_user(argp, &res, sizeof(res)) ?
574 inode_get_bytes(filp->f_path.dentry->d_inode); 575 -EFAULT : 0;
575 error = copy_to_user((loff_t __user *)arg, &res,
576 sizeof(res)) ? -EFAULT : 0;
577 } else 576 } else
578 error = -ENOTTY; 577 error = -ENOTTY;
579 break; 578 break;
@@ -590,14 +589,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
590 return ioctl_fiemap(filp, arg); 589 return ioctl_fiemap(filp, arg);
591 590
592 case FIGETBSZ: 591 case FIGETBSZ:
593 { 592 return put_user(inode->i_sb->s_blocksize, argp);
594 struct inode *inode = filp->f_path.dentry->d_inode;
595 int __user *p = (int __user *)arg;
596 return put_user(inode->i_sb->s_blocksize, p);
597 }
598 593
599 default: 594 default:
600 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 595 if (S_ISREG(inode->i_mode))
601 error = file_ioctl(filp, cmd, arg); 596 error = file_ioctl(filp, cmd, arg);
602 else 597 else
603 error = vfs_ioctl(filp, cmd, arg); 598 error = vfs_ioctl(filp, cmd, arg);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a0f3833c0dbf..3db5ba4568fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1158,7 +1158,6 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
1158 1158
1159static const struct address_space_operations isofs_aops = { 1159static const struct address_space_operations isofs_aops = {
1160 .readpage = isofs_readpage, 1160 .readpage = isofs_readpage,
1161 .sync_page = block_sync_page,
1162 .bmap = _isofs_bmap 1161 .bmap = _isofs_bmap
1163}; 1162};
1164 1163
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 34a4861c14b8..69b180459463 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -20,6 +20,7 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/blkdev.h>
23 24
24/* 25/*
25 * Default IO end handler for temporary BJ_IO buffer_heads. 26 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -294,7 +295,7 @@ void journal_commit_transaction(journal_t *journal)
294 int first_tag = 0; 295 int first_tag = 0;
295 int tag_flag; 296 int tag_flag;
296 int i; 297 int i;
297 int write_op = WRITE_SYNC; 298 struct blk_plug plug;
298 299
299 /* 300 /*
300 * First job: lock down the current transaction and wait for 301 * First job: lock down the current transaction and wait for
@@ -327,13 +328,6 @@ void journal_commit_transaction(journal_t *journal)
327 spin_lock(&journal->j_state_lock); 328 spin_lock(&journal->j_state_lock);
328 commit_transaction->t_state = T_LOCKED; 329 commit_transaction->t_state = T_LOCKED;
329 330
330 /*
331 * Use plugged writes here, since we want to submit several before
332 * we unplug the device. We don't do explicit unplugging in here,
333 * instead we rely on sync_buffer() doing the unplug for us.
334 */
335 if (commit_transaction->t_synchronous_commit)
336 write_op = WRITE_SYNC_PLUG;
337 spin_lock(&commit_transaction->t_handle_lock); 331 spin_lock(&commit_transaction->t_handle_lock);
338 while (commit_transaction->t_updates) { 332 while (commit_transaction->t_updates) {
339 DEFINE_WAIT(wait); 333 DEFINE_WAIT(wait);
@@ -368,7 +362,7 @@ void journal_commit_transaction(journal_t *journal)
368 * we do not require it to remember exactly which old buffers it 362 * we do not require it to remember exactly which old buffers it
369 * has reserved. This is consistent with the existing behaviour 363 * has reserved. This is consistent with the existing behaviour
370 * that multiple journal_get_write_access() calls to the same 364 * that multiple journal_get_write_access() calls to the same
371 * buffer are perfectly permissable. 365 * buffer are perfectly permissible.
372 */ 366 */
373 while (commit_transaction->t_reserved_list) { 367 while (commit_transaction->t_reserved_list) {
374 jh = commit_transaction->t_reserved_list; 368 jh = commit_transaction->t_reserved_list;
@@ -418,8 +412,10 @@ void journal_commit_transaction(journal_t *journal)
418 * Now start flushing things to disk, in the order they appear 412 * Now start flushing things to disk, in the order they appear
419 * on the transaction lists. Data blocks go first. 413 * on the transaction lists. Data blocks go first.
420 */ 414 */
415 blk_start_plug(&plug);
421 err = journal_submit_data_buffers(journal, commit_transaction, 416 err = journal_submit_data_buffers(journal, commit_transaction,
422 write_op); 417 WRITE_SYNC);
418 blk_finish_plug(&plug);
423 419
424 /* 420 /*
425 * Wait for all previously submitted IO to complete. 421 * Wait for all previously submitted IO to complete.
@@ -480,7 +476,9 @@ void journal_commit_transaction(journal_t *journal)
480 err = 0; 476 err = 0;
481 } 477 }
482 478
483 journal_write_revoke_records(journal, commit_transaction, write_op); 479 blk_start_plug(&plug);
480
481 journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC);
484 482
485 /* 483 /*
486 * If we found any dirty or locked buffers, then we should have 484 * If we found any dirty or locked buffers, then we should have
@@ -650,7 +648,7 @@ start_journal_io:
650 clear_buffer_dirty(bh); 648 clear_buffer_dirty(bh);
651 set_buffer_uptodate(bh); 649 set_buffer_uptodate(bh);
652 bh->b_end_io = journal_end_buffer_io_sync; 650 bh->b_end_io = journal_end_buffer_io_sync;
653 submit_bh(write_op, bh); 651 submit_bh(WRITE_SYNC, bh);
654 } 652 }
655 cond_resched(); 653 cond_resched();
656 654
@@ -661,6 +659,8 @@ start_journal_io:
661 } 659 }
662 } 660 }
663 661
662 blk_finish_plug(&plug);
663
664 /* Lo and behold: we have just managed to send a transaction to 664 /* Lo and behold: we have just managed to send a transaction to
665 the log. Before we can commit it, wait for the IO so far to 665 the log. Before we can commit it, wait for the IO so far to
666 complete. Control buffers being written are on the 666 complete. Control buffers being written are on the
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index eb11601f2e00..b3713afaaa9e 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -770,7 +770,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
770 journal->j_wbufsize = n; 770 journal->j_wbufsize = n;
771 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 771 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
772 if (!journal->j_wbuf) { 772 if (!journal->j_wbuf) {
773 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 773 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
774 __func__); 774 __func__);
775 goto out_err; 775 goto out_err;
776 } 776 }
@@ -831,7 +831,7 @@ journal_t * journal_init_inode (struct inode *inode)
831 journal->j_wbufsize = n; 831 journal->j_wbufsize = n;
832 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 832 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
833 if (!journal->j_wbuf) { 833 if (!journal->j_wbuf) {
834 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 834 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
835 __func__); 835 __func__);
836 goto out_err; 836 goto out_err;
837 } 837 }
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d29018307e2e..305a90763154 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -71,7 +71,7 @@
71 * switching hash tables under them. For operations on the lists of entries in 71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used. 72 * the hash table j_revoke_lock is used.
73 * 73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else 74 * Finally, also replay code uses the hash tables but at this moment no one else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is 75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed. 76 * needed.
77 */ 77 */
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5b2e4c30a2a1..60d2319651b2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1392,7 +1392,7 @@ int journal_stop(handle_t *handle)
1392 * by 30x or more... 1392 * by 30x or more...
1393 * 1393 *
1394 * We try and optimize the sleep time against what the underlying disk 1394 * We try and optimize the sleep time against what the underlying disk
1395 * can do, instead of having a static sleep time. This is usefull for 1395 * can do, instead of having a static sleep time. This is useful for
1396 * the case where our storage is so fast that it is more optimal to go 1396 * the case where our storage is so fast that it is more optimal to go
1397 * ahead and force a flush and wait for the transaction to be committed 1397 * ahead and force a flush and wait for the transaction to be committed
1398 * than it is to wait for an arbitrary amount of time for new writers to 1398 * than it is to wait for an arbitrary amount of time for new writers to
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f3ad1598b201..6e28000a4b21 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
105 int ret; 105 int ret;
106 struct timespec now = current_kernel_time(); 106 struct timespec now = current_kernel_time();
107 107
108 *cbh = NULL;
109
108 if (is_journal_aborted(journal)) 110 if (is_journal_aborted(journal))
109 return 0; 111 return 0;
110 112
@@ -137,9 +139,9 @@ static int journal_submit_commit_record(journal_t *journal,
137 if (journal->j_flags & JBD2_BARRIER && 139 if (journal->j_flags & JBD2_BARRIER &&
138 !JBD2_HAS_INCOMPAT_FEATURE(journal, 140 !JBD2_HAS_INCOMPAT_FEATURE(journal,
139 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) 141 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
140 ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); 142 ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
141 else 143 else
142 ret = submit_bh(WRITE_SYNC_PLUG, bh); 144 ret = submit_bh(WRITE_SYNC, bh);
143 145
144 *cbh = bh; 146 *cbh = bh;
145 return ret; 147 return ret;
@@ -329,7 +331,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
329 int tag_bytes = journal_tag_bytes(journal); 331 int tag_bytes = journal_tag_bytes(journal);
330 struct buffer_head *cbh = NULL; /* For transactional checksums */ 332 struct buffer_head *cbh = NULL; /* For transactional checksums */
331 __u32 crc32_sum = ~0; 333 __u32 crc32_sum = ~0;
332 int write_op = WRITE_SYNC; 334 struct blk_plug plug;
333 335
334 /* 336 /*
335 * First job: lock down the current transaction and wait for 337 * First job: lock down the current transaction and wait for
@@ -363,13 +365,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
363 write_lock(&journal->j_state_lock); 365 write_lock(&journal->j_state_lock);
364 commit_transaction->t_state = T_LOCKED; 366 commit_transaction->t_state = T_LOCKED;
365 367
366 /*
367 * Use plugged writes here, since we want to submit several before
368 * we unplug the device. We don't do explicit unplugging in here,
369 * instead we rely on sync_buffer() doing the unplug for us.
370 */
371 if (commit_transaction->t_synchronous_commit)
372 write_op = WRITE_SYNC_PLUG;
373 trace_jbd2_commit_locking(journal, commit_transaction); 368 trace_jbd2_commit_locking(journal, commit_transaction);
374 stats.run.rs_wait = commit_transaction->t_max_wait; 369 stats.run.rs_wait = commit_transaction->t_max_wait;
375 stats.run.rs_locked = jiffies; 370 stats.run.rs_locked = jiffies;
@@ -410,7 +405,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
410 * we do not require it to remember exactly which old buffers it 405 * we do not require it to remember exactly which old buffers it
411 * has reserved. This is consistent with the existing behaviour 406 * has reserved. This is consistent with the existing behaviour
412 * that multiple jbd2_journal_get_write_access() calls to the same 407 * that multiple jbd2_journal_get_write_access() calls to the same
413 * buffer are perfectly permissable. 408 * buffer are perfectly permissible.
414 */ 409 */
415 while (commit_transaction->t_reserved_list) { 410 while (commit_transaction->t_reserved_list) {
416 jh = commit_transaction->t_reserved_list; 411 jh = commit_transaction->t_reserved_list;
@@ -469,8 +464,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
469 if (err) 464 if (err)
470 jbd2_journal_abort(journal, err); 465 jbd2_journal_abort(journal, err);
471 466
467 blk_start_plug(&plug);
472 jbd2_journal_write_revoke_records(journal, commit_transaction, 468 jbd2_journal_write_revoke_records(journal, commit_transaction,
473 write_op); 469 WRITE_SYNC);
470 blk_finish_plug(&plug);
474 471
475 jbd_debug(3, "JBD: commit phase 2\n"); 472 jbd_debug(3, "JBD: commit phase 2\n");
476 473
@@ -497,6 +494,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
497 err = 0; 494 err = 0;
498 descriptor = NULL; 495 descriptor = NULL;
499 bufs = 0; 496 bufs = 0;
497 blk_start_plug(&plug);
500 while (commit_transaction->t_buffers) { 498 while (commit_transaction->t_buffers) {
501 499
502 /* Find the next buffer to be journaled... */ 500 /* Find the next buffer to be journaled... */
@@ -658,7 +656,7 @@ start_journal_io:
658 clear_buffer_dirty(bh); 656 clear_buffer_dirty(bh);
659 set_buffer_uptodate(bh); 657 set_buffer_uptodate(bh);
660 bh->b_end_io = journal_end_buffer_io_sync; 658 bh->b_end_io = journal_end_buffer_io_sync;
661 submit_bh(write_op, bh); 659 submit_bh(WRITE_SYNC, bh);
662 } 660 }
663 cond_resched(); 661 cond_resched();
664 stats.run.rs_blocks_logged += bufs; 662 stats.run.rs_blocks_logged += bufs;
@@ -699,6 +697,8 @@ start_journal_io:
699 __jbd2_journal_abort_hard(journal); 697 __jbd2_journal_abort_hard(journal);
700 } 698 }
701 699
700 blk_finish_plug(&plug);
701
702 /* Lo and behold: we have just managed to send a transaction to 702 /* Lo and behold: we have just managed to send a transaction to
703 the log. Before we can commit it, wait for the IO so far to 703 the log. Before we can commit it, wait for the IO so far to
704 complete. Control buffers being written are on the 704 complete. Control buffers being written are on the
@@ -808,7 +808,7 @@ wait_for_iobuf:
808 if (err) 808 if (err)
809 __jbd2_journal_abort_hard(journal); 809 __jbd2_journal_abort_hard(journal);
810 } 810 }
811 if (!err && !is_journal_aborted(journal)) 811 if (cbh)
812 err = journal_wait_on_commit_record(journal, cbh); 812 err = journal_wait_on_commit_record(journal, cbh);
813 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 813 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
814 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && 814 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 90407b8fece7..e0ec3db1c395 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -917,7 +917,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
917 journal->j_wbufsize = n; 917 journal->j_wbufsize = n;
918 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 918 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
919 if (!journal->j_wbuf) { 919 if (!journal->j_wbuf) {
920 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 920 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
921 __func__); 921 __func__);
922 goto out_err; 922 goto out_err;
923 } 923 }
@@ -983,7 +983,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
983 journal->j_wbufsize = n; 983 journal->j_wbufsize = n;
984 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 984 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
985 if (!journal->j_wbuf) { 985 if (!journal->j_wbuf) {
986 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 986 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
987 __func__); 987 __func__);
988 goto out_err; 988 goto out_err;
989 } 989 }
@@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device)
2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2414 if (!new_dev) 2414 if (!new_dev)
2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ 2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2416 bd = bdget(device);
2416 spin_lock(&devname_cache_lock); 2417 spin_lock(&devname_cache_lock);
2417 if (devcache[i]) { 2418 if (devcache[i]) {
2418 if (devcache[i]->device == device) { 2419 if (devcache[i]->device == device) {
2419 kfree(new_dev); 2420 kfree(new_dev);
2421 bdput(bd);
2420 ret = devcache[i]->devname; 2422 ret = devcache[i]->devname;
2421 spin_unlock(&devname_cache_lock); 2423 spin_unlock(&devname_cache_lock);
2422 return ret; 2424 return ret;
@@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device)
2425 } 2427 }
2426 devcache[i] = new_dev; 2428 devcache[i] = new_dev;
2427 devcache[i]->device = device; 2429 devcache[i]->device = device;
2428 bd = bdget(device);
2429 if (bd) { 2430 if (bd) {
2430 bdevname(bd, devcache[i]->devname); 2431 bdevname(bd, devcache[i]->devname);
2431 bdput(bd); 2432 bdput(bd);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 9ad321fd63fd..69fd93588118 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -71,7 +71,7 @@
71 * switching hash tables under them. For operations on the lists of entries in 71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used. 72 * the hash table j_revoke_lock is used.
73 * 73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else 74 * Finally, also replay code uses the hash tables but at this moment no one else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is 75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed. 76 * needed.
77 */ 77 */
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 1d1191050f99..05fa77a23711 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1403,7 +1403,7 @@ int jbd2_journal_stop(handle_t *handle)
1403 1403
1404 /* 1404 /*
1405 * Once we drop t_updates, if it goes to zero the transaction 1405 * Once we drop t_updates, if it goes to zero the transaction
1406 * could start commiting on us and eventually disappear. So 1406 * could start committing on us and eventually disappear. So
1407 * once we do this, we must not dereference transaction 1407 * once we do this, we must not dereference transaction
1408 * pointer again. 1408 * pointer again.
1409 */ 1409 */
diff --git a/fs/jffs2/TODO b/fs/jffs2/TODO
index 5d3ea4070f01..ca28964abd4b 100644
--- a/fs/jffs2/TODO
+++ b/fs/jffs2/TODO
@@ -11,7 +11,7 @@
11 - checkpointing (do we need this? scan is quite fast) 11 - checkpointing (do we need this? scan is quite fast)
12 - make the scan code populate real inodes so read_inode just after 12 - make the scan code populate real inodes so read_inode just after
13 mount doesn't have to read the flash twice for large files. 13 mount doesn't have to read the flash twice for large files.
14 Make this a per-inode option, changable with chattr, so you can 14 Make this a per-inode option, changeable with chattr, so you can
15 decide which inodes should be in-core immediately after mount. 15 decide which inodes should be in-core immediately after mount.
16 - test, test, test 16 - test, test, test
17 17
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 95b79672150a..828a0e1ea438 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -402,7 +402,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
402 402
403 if (name[0] != '\0') 403 if (name[0] != '\0')
404 return -EINVAL; 404 return -EINVAL;
405 if (!is_owner_or_cap(dentry->d_inode)) 405 if (!inode_owner_or_capable(dentry->d_inode))
406 return -EPERM; 406 return -EPERM;
407 407
408 if (value) { 408 if (value) {
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index fd05a0b9431d..5a001020c542 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -40,12 +40,13 @@ static z_stream inf_strm, def_strm;
40 40
41static int __init alloc_workspaces(void) 41static int __init alloc_workspaces(void)
42{ 42{
43 def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 43 def_strm.workspace = vmalloc(zlib_deflate_workspacesize(MAX_WBITS,
44 MAX_MEM_LEVEL));
44 if (!def_strm.workspace) { 45 if (!def_strm.workspace) {
45 printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize()); 46 printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL));
46 return -ENOMEM; 47 return -ENOMEM;
47 } 48 }
48 D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize())); 49 D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL)));
49 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 50 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
50 if (!inf_strm.workspace) { 51 if (!inf_strm.workspace) {
51 printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize()); 52 printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize());
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index d32ee9412cb9..2ab1a0d91210 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -24,7 +24,7 @@
24 * 24 *
25 * Returns: 0 if the data CRC is correct; 25 * Returns: 0 if the data CRC is correct;
26 * 1 - if incorrect; 26 * 1 - if incorrect;
27 * error code if an error occured. 27 * error code if an error occurred.
28 */ 28 */
29static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) 29static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
30{ 30{
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 800171dca53b..e537fb0e0184 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -121,7 +121,7 @@ int jffs2_sum_add_inode_mem(struct jffs2_summary *s, struct jffs2_raw_inode *ri,
121 temp->nodetype = ri->nodetype; 121 temp->nodetype = ri->nodetype;
122 temp->inode = ri->ino; 122 temp->inode = ri->ino;
123 temp->version = ri->version; 123 temp->version = ri->version;
124 temp->offset = cpu_to_je32(ofs); /* relative offset from the begining of the jeb */ 124 temp->offset = cpu_to_je32(ofs); /* relative offset from the beginning of the jeb */
125 temp->totlen = ri->totlen; 125 temp->totlen = ri->totlen;
126 temp->next = NULL; 126 temp->next = NULL;
127 127
@@ -139,7 +139,7 @@ int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *r
139 139
140 temp->nodetype = rd->nodetype; 140 temp->nodetype = rd->nodetype;
141 temp->totlen = rd->totlen; 141 temp->totlen = rd->totlen;
142 temp->offset = cpu_to_je32(ofs); /* relative from the begining of the jeb */ 142 temp->offset = cpu_to_je32(ofs); /* relative from the beginning of the jeb */
143 temp->pino = rd->pino; 143 temp->pino = rd->pino;
144 temp->version = rd->version; 144 temp->version = rd->version;
145 temp->ino = rd->ino; 145 temp->ino = rd->ino;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 07ee1546b2fa..4515bea0268f 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1116,7 +1116,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
1116 1116
1117/* 1117/*
1118 * On NAND we try to mark this block bad. If the block was erased more 1118 * On NAND we try to mark this block bad. If the block was erased more
1119 * than MAX_ERASE_FAILURES we mark it finaly bad. 1119 * than MAX_ERASE_FAILURES we mark it finally bad.
1120 * Don't care about failures. This block remains on the erase-pending 1120 * Don't care about failures. This block remains on the erase-pending
1121 * or badblock list as long as nobody manipulates the flash with 1121 * or badblock list as long as nobody manipulates the flash with
1122 * a bootloader or something like that. 1122 * a bootloader or something like that.
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4f9cc0482949..3e93cdd19005 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -31,7 +31,7 @@
31 * is used to release xattr name/value pair and detach from c->xattrindex. 31 * is used to release xattr name/value pair and detach from c->xattrindex.
32 * reclaim_xattr_datum(c) 32 * reclaim_xattr_datum(c)
33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when 33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold 34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold
35 * is hard coded as 32KiB. 35 * is hard coded as 32KiB.
36 * do_verify_xattr_datum(c, xd) 36 * do_verify_xattr_datum(c, xd)
37 * is used to load the xdatum informations without name/value pair from the medium. 37 * is used to load the xdatum informations without name/value pair from the medium.
diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index 3adb6395e42d..a58fa72d7e59 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -13,4 +13,4 @@ jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
13 13
14jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o 14jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
15 15
16EXTRA_CFLAGS += -D_JFS_4K 16ccflags-y := -D_JFS_4K
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9978803ceedc..eddbb373209e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -352,7 +352,6 @@ const struct address_space_operations jfs_aops = {
352 .readpages = jfs_readpages, 352 .readpages = jfs_readpages,
353 .writepage = jfs_writepage, 353 .writepage = jfs_writepage,
354 .writepages = jfs_writepages, 354 .writepages = jfs_writepages,
355 .sync_page = block_sync_page,
356 .write_begin = jfs_write_begin, 355 .write_begin = jfs_write_begin,
357 .write_end = nobh_write_end, 356 .write_end = nobh_write_end,
358 .bmap = jfs_bmap, 357 .bmap = jfs_bmap,
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index afe222bf300f..6f98a1866776 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -72,7 +72,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
72 if (err) 72 if (err)
73 return err; 73 return err;
74 74
75 if (!is_owner_or_cap(inode)) { 75 if (!inode_owner_or_capable(inode)) {
76 err = -EACCES; 76 err = -EACCES;
77 goto setflags_out; 77 goto setflags_out;
78 } 78 }
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index c92ea3b3ea5e..4496872cf4e7 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1649,7 +1649,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1649 } 1649 }
1650 1650
1651 /* search the tree within the dmap control page for 1651 /* search the tree within the dmap control page for
1652 * sufficent free space. if sufficient free space is found, 1652 * sufficient free space. if sufficient free space is found,
1653 * dbFindLeaf() returns the index of the leaf at which 1653 * dbFindLeaf() returns the index of the leaf at which
1654 * free space was found. 1654 * free space was found.
1655 */ 1655 */
@@ -2744,7 +2744,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
2744 /* check which (leafno or buddy) is the left buddy. 2744 /* check which (leafno or buddy) is the left buddy.
2745 * the left buddy gets to claim the blocks resulting 2745 * the left buddy gets to claim the blocks resulting
2746 * from the join while the right gets to claim none. 2746 * from the join while the right gets to claim none.
2747 * the left buddy is also eligable to participate in 2747 * the left buddy is also eligible to participate in
2748 * a join at the next higher level while the right 2748 * a join at the next higher level while the right
2749 * is not. 2749 * is not.
2750 * 2750 *
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 5d3bbd10f8db..e5fe8506ed16 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -126,7 +126,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
126 126
127 /* allocate the disk blocks for the extent. initially, extBalloc() 127 /* allocate the disk blocks for the extent. initially, extBalloc()
128 * will try to allocate disk blocks for the requested size (xlen). 128 * will try to allocate disk blocks for the requested size (xlen).
129 * if this fails (xlen contiguous free blocks not avaliable), it'll 129 * if this fails (xlen contiguous free blocks not available), it'll
130 * try to allocate a smaller number of blocks (producing a smaller 130 * try to allocate a smaller number of blocks (producing a smaller
131 * extent), with this smaller number of blocks consisting of the 131 * extent), with this smaller number of blocks consisting of the
132 * requested number of blocks rounded down to the next smaller 132 * requested number of blocks rounded down to the next smaller
@@ -481,7 +481,7 @@ int extFill(struct inode *ip, xad_t * xp)
481 * 481 *
482 * initially, we will try to allocate disk blocks for the 482 * initially, we will try to allocate disk blocks for the
483 * requested size (nblocks). if this fails (nblocks 483 * requested size (nblocks). if this fails (nblocks
484 * contiguous free blocks not avaliable), we'll try to allocate 484 * contiguous free blocks not available), we'll try to allocate
485 * a smaller number of blocks (producing a smaller extent), with 485 * a smaller number of blocks (producing a smaller extent), with
486 * this smaller number of blocks consisting of the requested 486 * this smaller number of blocks consisting of the requested
487 * number of blocks rounded down to the next smaller power of 2 487 * number of blocks rounded down to the next smaller power of 2
@@ -575,7 +575,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
575 * to a new set of blocks. If moving the extent, we initially 575 * to a new set of blocks. If moving the extent, we initially
576 * will try to allocate disk blocks for the requested size 576 * will try to allocate disk blocks for the requested size
577 * (newnblks). if this fails (new contiguous free blocks not 577 * (newnblks). if this fails (new contiguous free blocks not
578 * avaliable), we'll try to allocate a smaller number of 578 * available), we'll try to allocate a smaller number of
579 * blocks (producing a smaller extent), with this smaller 579 * blocks (producing a smaller extent), with this smaller
580 * number of blocks consisting of the requested number of 580 * number of blocks consisting of the requested number of
581 * blocks rounded down to the next smaller power of 2 581 * blocks rounded down to the next smaller power of 2
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 3a09423b6c22..ed53a4740168 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -1069,7 +1069,7 @@ int diFree(struct inode *ip)
1069 */ 1069 */
1070 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1070 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1071 /* in preparation for removing the iag from the 1071 /* in preparation for removing the iag from the
1072 * ag extent free list, read the iags preceeding 1072 * ag extent free list, read the iags preceding
1073 * and following the iag on the ag extent free 1073 * and following the iag on the ag extent free
1074 * list. 1074 * list.
1075 */ 1075 */
@@ -1095,7 +1095,7 @@ int diFree(struct inode *ip)
1095 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1095 int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1096 1096
1097 /* in preparation for removing the iag from the 1097 /* in preparation for removing the iag from the
1098 * ag inode free list, read the iags preceeding 1098 * ag inode free list, read the iags preceding
1099 * and following the iag on the ag inode free 1099 * and following the iag on the ag inode free
1100 * list. before reading these iags, we must make 1100 * list. before reading these iags, we must make
1101 * sure that we already don't have them in hand 1101 * sure that we already don't have them in hand
@@ -1681,7 +1681,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1681 * try to allocate a new extent of free inodes. 1681 * try to allocate a new extent of free inodes.
1682 */ 1682 */
1683 if (addext) { 1683 if (addext) {
1684 /* if free space is not avaliable for this new extent, try 1684 /* if free space is not available for this new extent, try
1685 * below to allocate a free and existing (already backed) 1685 * below to allocate a free and existing (already backed)
1686 * inode from the ag. 1686 * inode from the ag.
1687 */ 1687 */
@@ -2036,7 +2036,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2036 2036
2037 /* check if this is the last free inode within the iag. 2037 /* check if this is the last free inode within the iag.
2038 * if so, it will have to be removed from the ag free 2038 * if so, it will have to be removed from the ag free
2039 * inode list, so get the iags preceeding and following 2039 * inode list, so get the iags preceding and following
2040 * it on the list. 2040 * it on the list.
2041 */ 2041 */
2042 if (iagp->nfreeinos == cpu_to_le32(1)) { 2042 if (iagp->nfreeinos == cpu_to_le32(1)) {
@@ -2208,7 +2208,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2208 2208
2209 /* check if this is the last free extent within the 2209 /* check if this is the last free extent within the
2210 * iag. if so, the iag must be removed from the ag 2210 * iag. if so, the iag must be removed from the ag
2211 * free extent list, so get the iags preceeding and 2211 * free extent list, so get the iags preceding and
2212 * following the iag on this list. 2212 * following the iag on this list.
2213 */ 2213 */
2214 if (iagp->nfreeexts == cpu_to_le32(1)) { 2214 if (iagp->nfreeexts == cpu_to_le32(1)) {
@@ -2504,7 +2504,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2504 } 2504 }
2505 2505
2506 2506
2507 /* get the next avaliable iag number */ 2507 /* get the next available iag number */
2508 iagno = imap->im_nextiag; 2508 iagno = imap->im_nextiag;
2509 2509
2510 /* make sure that we have not exceeded the maximum inode 2510 /* make sure that we have not exceeded the maximum inode
@@ -2615,7 +2615,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2615 2615
2616 duplicateIXtree(sb, blkno, xlen, &xaddr); 2616 duplicateIXtree(sb, blkno, xlen, &xaddr);
2617 2617
2618 /* update the next avaliable iag number */ 2618 /* update the next available iag number */
2619 imap->im_nextiag += 1; 2619 imap->im_nextiag += 1;
2620 2620
2621 /* Add the iag to the iag free list so we don't lose the iag 2621 /* Add the iag to the iag free list so we don't lose the iag
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 9236bc49ae7f..e38c21598850 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -288,7 +288,7 @@ struct lrd {
288 /* 288 /*
289 * SYNCPT: log sync point 289 * SYNCPT: log sync point
290 * 290 *
291 * replay log upto syncpt address specified; 291 * replay log up to syncpt address specified;
292 */ 292 */
293 struct { 293 struct {
294 __le32 sync; /* 4: syncpt address (0 = here) */ 294 __le32 sync; /* 4: syncpt address (0 = here) */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 48b44bd8267b..6740d34cd82b 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -583,7 +583,6 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset)
583const struct address_space_operations jfs_metapage_aops = { 583const struct address_space_operations jfs_metapage_aops = {
584 .readpage = metapage_readpage, 584 .readpage = metapage_readpage,
585 .writepage = metapage_writepage, 585 .writepage = metapage_writepage,
586 .sync_page = block_sync_page,
587 .releasepage = metapage_releasepage, 586 .releasepage = metapage_releasepage,
588 .invalidatepage = metapage_invalidatepage, 587 .invalidatepage = metapage_invalidatepage,
589 .set_page_dirty = __set_page_dirty_nobuffers, 588 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index d94f8d9e87d7..a78beda85f68 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -75,7 +75,7 @@ extern void grab_metapage(struct metapage *);
75extern void force_metapage(struct metapage *); 75extern void force_metapage(struct metapage *);
76 76
77/* 77/*
78 * hold_metapage and put_metapage are used in conjuction. The page lock 78 * hold_metapage and put_metapage are used in conjunction. The page lock
79 * is not dropped between the two, so no other threads can get or release 79 * is not dropped between the two, so no other threads can get or release
80 * the metapage 80 * the metapage
81 */ 81 */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 9466957ec841..f6cc0c09ec63 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -636,7 +636,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
636 * the inode of the page and available to all anonymous 636 * the inode of the page and available to all anonymous
637 * transactions until txCommit() time at which point 637 * transactions until txCommit() time at which point
638 * they are transferred to the transaction tlock list of 638 * they are transferred to the transaction tlock list of
639 * the commiting transaction of the inode) 639 * the committing transaction of the inode)
640 */ 640 */
641 if (xtid == 0) { 641 if (xtid == 0) {
642 tlck->tid = tid; 642 tlck->tid = tid;
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 1aba0039f1c9..8ea5efb5a34e 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -57,7 +57,7 @@
57 * 2. compute new FSCKSize from new LVSize; 57 * 2. compute new FSCKSize from new LVSize;
58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where 58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
59 * assert(new FSSize >= old FSSize), 59 * assert(new FSSize >= old FSSize),
60 * i.e., file system must not be shrinked; 60 * i.e., file system must not be shrunk;
61 */ 61 */
62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) 62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
63{ 63{
@@ -182,7 +182,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
182 */ 182 */
183 newFSSize = newLVSize - newLogSize - newFSCKSize; 183 newFSSize = newLVSize - newLogSize - newFSCKSize;
184 184
185 /* file system cannot be shrinked */ 185 /* file system cannot be shrunk */
186 if (newFSSize < bmp->db_mapsize) { 186 if (newFSSize < bmp->db_mapsize) {
187 rc = -EINVAL; 187 rc = -EINVAL;
188 goto out; 188 goto out;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index eeca48a031ab..06c8a67cbe76 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -644,7 +644,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
644 644
645/* Read data from quotafile - avoid pagecache and such because we cannot afford 645/* Read data from quotafile - avoid pagecache and such because we cannot afford
646 * acquiring the locks... As quota files are never truncated and quota code 646 * acquiring the locks... As quota files are never truncated and quota code
647 * itself serializes the operations (and noone else should touch the files) 647 * itself serializes the operations (and no one else should touch the files)
648 * we don't have to be afraid of races */ 648 * we don't have to be afraid of races */
649static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, 649static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
650 size_t len, loff_t off) 650 size_t len, loff_t off)
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 3fa4c32272df..24838f1eeee5 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -678,7 +678,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
678 struct posix_acl *acl; 678 struct posix_acl *acl;
679 int rc; 679 int rc;
680 680
681 if (!is_owner_or_cap(inode)) 681 if (!inode_owner_or_capable(inode))
682 return -EPERM; 682 return -EPERM;
683 683
684 /* 684 /*
diff --git a/fs/locks.c b/fs/locks.c
index 822c3d1843af..0a4f50dfadfb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -414,17 +414,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
414 fl->fl_ops = NULL; 414 fl->fl_ops = NULL;
415 fl->fl_lmops = NULL; 415 fl->fl_lmops = NULL;
416 416
417 switch (l->l_type) { 417 return assign_type(fl, l->l_type);
418 case F_RDLCK:
419 case F_WRLCK:
420 case F_UNLCK:
421 fl->fl_type = l->l_type;
422 break;
423 default:
424 return -EINVAL;
425 }
426
427 return (0);
428} 418}
429#endif 419#endif
430 420
diff --git a/fs/logfs/compr.c b/fs/logfs/compr.c
index 44bbfd249abc..961f02b86d97 100644
--- a/fs/logfs/compr.c
+++ b/fs/logfs/compr.c
@@ -81,7 +81,7 @@ error:
81 81
82int __init logfs_compr_init(void) 82int __init logfs_compr_init(void)
83{ 83{
84 size_t size = max(zlib_deflate_workspacesize(), 84 size_t size = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
85 zlib_inflate_workspacesize()); 85 zlib_inflate_workspacesize());
86 stream.workspace = vmalloc(size); 86 stream.workspace = vmalloc(size);
87 if (!stream.workspace) 87 if (!stream.workspace)
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 723bc5bca09a..1adc8d455f0e 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -39,7 +39,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
39 bio.bi_end_io = request_complete; 39 bio.bi_end_io = request_complete;
40 40
41 submit_bio(rw, &bio); 41 submit_bio(rw, &bio);
42 generic_unplug_device(bdev_get_queue(bdev));
43 wait_for_completion(&complete); 42 wait_for_completion(&complete);
44 return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; 43 return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
45} 44}
@@ -168,7 +167,6 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
168 } 167 }
169 len = PAGE_ALIGN(len); 168 len = PAGE_ALIGN(len);
170 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 169 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
171 generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
172} 170}
173 171
174 172
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 7466e9dcc8c5..339e17e9133d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -60,7 +60,7 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf)
60 * asynchronous properties. So just to prevent the first implementor of such 60 * asynchronous properties. So just to prevent the first implementor of such
61 * a thing from breaking logfs in 2350, we do the usual pointless dance to 61 * a thing from breaking logfs in 2350, we do the usual pointless dance to
62 * declare a completion variable and wait for completion before returning 62 * declare a completion variable and wait for completion before returning
63 * from mtd_erase(). What an excercise in futility! 63 * from mtd_erase(). What an exercise in futility!
64 */ 64 */
65static void logfs_erase_callback(struct erase_info *ei) 65static void logfs_erase_callback(struct erase_info *ei)
66{ 66{
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9ddf0c388c8..9ed89d1663f8 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -92,7 +92,7 @@ static int beyond_eof(struct inode *inode, loff_t bix)
92 * so short names (len <= 9) don't even occupy the complete 32bit name 92 * so short names (len <= 9) don't even occupy the complete 32bit name
93 * space. A prime >256 ensures short names quickly spread the 32bit 93 * space. A prime >256 ensures short names quickly spread the 32bit
94 * name space. Add about 26 for the estimated amount of information 94 * name space. Add about 26 for the estimated amount of information
95 * of each character and pick a prime nearby, preferrably a bit-sparse 95 * of each character and pick a prime nearby, preferably a bit-sparse
96 * one. 96 * one.
97 */ 97 */
98static u32 hash_32(const char *s, int len, u32 seed) 98static u32 hash_32(const char *s, int len, u32 seed)
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index e86376b87af1..c2ad7028def4 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -196,7 +196,7 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
196 if (IS_RDONLY(inode)) 196 if (IS_RDONLY(inode))
197 return -EROFS; 197 return -EROFS;
198 198
199 if (!is_owner_or_cap(inode)) 199 if (!inode_owner_or_capable(inode))
200 return -EACCES; 200 return -EACCES;
201 201
202 err = get_user(flags, (int __user *)arg); 202 err = get_user(flags, (int __user *)arg);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 03b8c240aeda..edfea7a3a747 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
293 return ret; 293 return ret;
294} 294}
295 295
296/* called with inode_lock held */ 296/* called with inode->i_lock held */
297static int logfs_drop_inode(struct inode *inode) 297static int logfs_drop_inode(struct inode *inode)
298{ 298{
299 struct logfs_super *super = logfs_super(inode->i_sb); 299 struct logfs_super *super = logfs_super(inode->i_sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 3dcb3a60c331..d8d09380c7de 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1616,7 +1616,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
1616 err = logfs_write_buf(inode, page, flags); 1616 err = logfs_write_buf(inode, page, flags);
1617 if (!err && shrink_level(gc_level) == 0) { 1617 if (!err && shrink_level(gc_level) == 0) {
1618 /* Rewrite cannot mark the inode dirty but has to 1618 /* Rewrite cannot mark the inode dirty but has to
1619 * write it immediatly. 1619 * write it immediately.
1620 * Q: Can't we just create an alias for the inode 1620 * Q: Can't we just create an alias for the inode
1621 * instead? And if not, why not? 1621 * instead? And if not, why not?
1622 */ 1622 */
diff --git a/fs/mbcache.c b/fs/mbcache.c
index a25444ab2baf..2f174be06555 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -542,7 +542,7 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head,
542 * mb_cache_entry_find_first() 542 * mb_cache_entry_find_first()
543 * 543 *
544 * Find the first cache entry on a given device with a certain key in 544 * Find the first cache entry on a given device with a certain key in
545 * an additional index. Additonal matches can be found with 545 * an additional index. Additional matches can be found with
546 * mb_cache_entry_find_next(). Returns NULL if no match was found. The 546 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
547 * returned cache entry is locked for shared access ("multiple readers"). 547 * returned cache entry is locked for shared access ("multiple readers").
548 * 548 *
diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig
index 0fd7ca994264..6624684dd5de 100644
--- a/fs/minix/Kconfig
+++ b/fs/minix/Kconfig
@@ -15,3 +15,11 @@ config MINIX_FS
15 module will be called minix. Note that the file system of your root 15 module will be called minix. Note that the file system of your root
16 partition (the one containing the directory /) cannot be compiled as 16 partition (the one containing the directory /) cannot be compiled as
17 a module. 17 a module.
18
19config MINIX_FS_NATIVE_ENDIAN
20 def_bool MINIX_FS
21 depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
22
23config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED
24 def_bool MINIX_FS
25 depends on M68K && MMU
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index ae0b83f476a6..adcdc0a4e182 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -399,7 +399,6 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
399static const struct address_space_operations minix_aops = { 399static const struct address_space_operations minix_aops = {
400 .readpage = minix_readpage, 400 .readpage = minix_readpage,
401 .writepage = minix_writepage, 401 .writepage = minix_writepage,
402 .sync_page = block_sync_page,
403 .write_begin = minix_write_begin, 402 .write_begin = minix_write_begin,
404 .write_end = generic_write_end, 403 .write_end = generic_write_end,
405 .bmap = minix_bmap 404 .bmap = minix_bmap
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 407b1c84911e..341e2122879a 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -88,4 +88,78 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
88 return list_entry(inode, struct minix_inode_info, vfs_inode); 88 return list_entry(inode, struct minix_inode_info, vfs_inode);
89} 89}
90 90
91#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \
92 defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
93
94#error Minix file system byte order broken
95
96#elif defined(CONFIG_MINIX_FS_NATIVE_ENDIAN)
97
98/*
99 * big-endian 32 or 64 bit indexed bitmaps on big-endian system or
100 * little-endian bitmaps on little-endian system
101 */
102
103#define minix_test_and_set_bit(nr, addr) \
104 __test_and_set_bit((nr), (unsigned long *)(addr))
105#define minix_set_bit(nr, addr) \
106 __set_bit((nr), (unsigned long *)(addr))
107#define minix_test_and_clear_bit(nr, addr) \
108 __test_and_clear_bit((nr), (unsigned long *)(addr))
109#define minix_test_bit(nr, addr) \
110 test_bit((nr), (unsigned long *)(addr))
111#define minix_find_first_zero_bit(addr, size) \
112 find_first_zero_bit((unsigned long *)(addr), (size))
113
114#elif defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
115
116/*
117 * big-endian 16bit indexed bitmaps
118 */
119
120static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size)
121{
122 const unsigned short *p = vaddr, *addr = vaddr;
123 unsigned short num;
124
125 if (!size)
126 return 0;
127
128 size = (size >> 4) + ((size & 15) > 0);
129 while (*p++ == 0xffff) {
130 if (--size == 0)
131 return (p - addr) << 4;
132 }
133
134 num = *--p;
135 return ((p - addr) << 4) + ffz(num);
136}
137
138#define minix_test_and_set_bit(nr, addr) \
139 __test_and_set_bit((nr) ^ 16, (unsigned long *)(addr))
140#define minix_set_bit(nr, addr) \
141 __set_bit((nr) ^ 16, (unsigned long *)(addr))
142#define minix_test_and_clear_bit(nr, addr) \
143 __test_and_clear_bit((nr) ^ 16, (unsigned long *)(addr))
144
145static inline int minix_test_bit(int nr, const void *vaddr)
146{
147 const unsigned short *p = vaddr;
148 return (p[nr >> 4] & (1U << (nr & 15))) != 0;
149}
150
151#else
152
153/*
154 * little-endian bitmaps
155 */
156
157#define minix_test_and_set_bit __test_and_set_bit_le
158#define minix_set_bit __set_bit_le
159#define minix_test_and_clear_bit __test_and_clear_bit_le
160#define minix_test_bit test_bit_le
161#define minix_find_first_zero_bit find_first_zero_bit_le
162
163#endif
164
91#endif /* FS_MINIX_H */ 165#endif /* FS_MINIX_H */
diff --git a/fs/mpage.c b/fs/mpage.c
index d78455a81ec9..0afc809e46e0 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -364,6 +364,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
364 sector_t last_block_in_bio = 0; 364 sector_t last_block_in_bio = 0;
365 struct buffer_head map_bh; 365 struct buffer_head map_bh;
366 unsigned long first_logical_block = 0; 366 unsigned long first_logical_block = 0;
367 struct blk_plug plug;
368
369 blk_start_plug(&plug);
367 370
368 map_bh.b_state = 0; 371 map_bh.b_state = 0;
369 map_bh.b_size = 0; 372 map_bh.b_size = 0;
@@ -385,6 +388,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
385 BUG_ON(!list_empty(pages)); 388 BUG_ON(!list_empty(pages));
386 if (bio) 389 if (bio)
387 mpage_bio_submit(READ, bio); 390 mpage_bio_submit(READ, bio);
391 blk_finish_plug(&plug);
388 return 0; 392 return 0;
389} 393}
390EXPORT_SYMBOL(mpage_readpages); 394EXPORT_SYMBOL(mpage_readpages);
@@ -666,8 +670,11 @@ int
666mpage_writepages(struct address_space *mapping, 670mpage_writepages(struct address_space *mapping,
667 struct writeback_control *wbc, get_block_t get_block) 671 struct writeback_control *wbc, get_block_t get_block)
668{ 672{
673 struct blk_plug plug;
669 int ret; 674 int ret;
670 675
676 blk_start_plug(&plug);
677
671 if (!get_block) 678 if (!get_block)
672 ret = generic_writepages(mapping, wbc); 679 ret = generic_writepages(mapping, wbc);
673 else { 680 else {
@@ -682,6 +689,7 @@ mpage_writepages(struct address_space *mapping,
682 if (mpd.bio) 689 if (mpd.bio)
683 mpage_bio_submit(WRITE, mpd.bio); 690 mpage_bio_submit(WRITE, mpd.bio);
684 } 691 }
692 blk_finish_plug(&plug);
685 return ret; 693 return ret;
686} 694}
687EXPORT_SYMBOL(mpage_writepages); 695EXPORT_SYMBOL(mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c
index 5a9a6c3094da..54fc993e3027 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -70,7 +70,7 @@
70 * name indicated by the symlink. The old code always complained that the 70 * name indicated by the symlink. The old code always complained that the
71 * name already exists, due to not following the symlink even if its target 71 * name already exists, due to not following the symlink even if its target
72 * is nonexistent. The new semantics affects also mknod() and link() when 72 * is nonexistent. The new semantics affects also mknod() and link() when
73 * the name is a symlink pointing to a non-existant name. 73 * the name is a symlink pointing to a non-existent name.
74 * 74 *
75 * I don't know which semantics is the right one, since I have no access 75 * I don't know which semantics is the right one, since I have no access
76 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 76 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
@@ -183,6 +183,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
183 183
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
185 185
186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms;
188
186 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
187 mode >>= 6; 190 mode >>= 6;
188 else { 191 else {
@@ -196,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
196 mode >>= 3; 199 mode >>= 3;
197 } 200 }
198 201
202other_perms:
199 /* 203 /*
200 * If the DACs are ok we don't need any capability check. 204 * If the DACs are ok we don't need any capability check.
201 */ 205 */
@@ -237,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
237 * Executable DACs are overridable if at least one exec bit is set. 241 * Executable DACs are overridable if at least one exec bit is set.
238 */ 242 */
239 if (!(mask & MAY_EXEC) || execute_ok(inode)) 243 if (!(mask & MAY_EXEC) || execute_ok(inode))
240 if (capable(CAP_DAC_OVERRIDE)) 244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
241 return 0; 245 return 0;
242 246
243 /* 247 /*
@@ -245,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
245 */ 249 */
246 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 250 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
247 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 251 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
248 if (capable(CAP_DAC_READ_SEARCH)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
249 return 0; 253 return 0;
250 254
251 return -EACCES; 255 return -EACCES;
@@ -654,6 +658,7 @@ static inline int handle_reval_path(struct nameidata *nd)
654static inline int exec_permission(struct inode *inode, unsigned int flags) 658static inline int exec_permission(struct inode *inode, unsigned int flags)
655{ 659{
656 int ret; 660 int ret;
661 struct user_namespace *ns = inode_userns(inode);
657 662
658 if (inode->i_op->permission) { 663 if (inode->i_op->permission) {
659 ret = inode->i_op->permission(inode, MAY_EXEC, flags); 664 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
@@ -666,7 +671,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags)
666 if (ret == -ECHILD) 671 if (ret == -ECHILD)
667 return ret; 672 return ret;
668 673
669 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 674 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
675 ns_capable(ns, CAP_DAC_READ_SEARCH))
670 goto ok; 676 goto ok;
671 677
672 return ret; 678 return ret;
@@ -691,6 +697,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
691 do { 697 do {
692 seq = read_seqcount_begin(&fs->seq); 698 seq = read_seqcount_begin(&fs->seq);
693 nd->root = fs->root; 699 nd->root = fs->root;
700 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
694 } while (read_seqcount_retry(&fs->seq, seq)); 701 } while (read_seqcount_retry(&fs->seq, seq));
695 } 702 }
696} 703}
@@ -986,6 +993,12 @@ int follow_down_one(struct path *path)
986 return 0; 993 return 0;
987} 994}
988 995
996static inline bool managed_dentry_might_block(struct dentry *dentry)
997{
998 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
999 dentry->d_op->d_manage(dentry, true) < 0);
1000}
1001
989/* 1002/*
990 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we 1003 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
991 * meet a managed dentry and we're not walking to "..". True is returned to 1004 * meet a managed dentry and we're not walking to "..". True is returned to
@@ -994,19 +1007,26 @@ int follow_down_one(struct path *path)
994static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 1007static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
995 struct inode **inode, bool reverse_transit) 1008 struct inode **inode, bool reverse_transit)
996{ 1009{
997 while (d_mountpoint(path->dentry)) { 1010 for (;;) {
998 struct vfsmount *mounted; 1011 struct vfsmount *mounted;
999 if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && 1012 /*
1000 !reverse_transit && 1013 * Don't forget we might have a non-mountpoint managed dentry
1001 path->dentry->d_op->d_manage(path->dentry, true) < 0) 1014 * that wants to block transit.
1015 */
1016 *inode = path->dentry->d_inode;
1017 if (!reverse_transit &&
1018 unlikely(managed_dentry_might_block(path->dentry)))
1002 return false; 1019 return false;
1020
1021 if (!d_mountpoint(path->dentry))
1022 break;
1023
1003 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1024 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
1004 if (!mounted) 1025 if (!mounted)
1005 break; 1026 break;
1006 path->mnt = mounted; 1027 path->mnt = mounted;
1007 path->dentry = mounted->mnt_root; 1028 path->dentry = mounted->mnt_root;
1008 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 1029 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
1009 *inode = path->dentry->d_inode;
1010 } 1030 }
1011 1031
1012 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) 1032 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
@@ -1644,13 +1664,16 @@ static int path_lookupat(int dfd, const char *name,
1644 err = -ECHILD; 1664 err = -ECHILD;
1645 } 1665 }
1646 1666
1647 if (!err) 1667 if (!err) {
1648 err = handle_reval_path(nd); 1668 err = handle_reval_path(nd);
1669 if (err)
1670 path_put(&nd->path);
1671 }
1649 1672
1650 if (!err && nd->flags & LOOKUP_DIRECTORY) { 1673 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1651 if (!nd->inode->i_op->lookup) { 1674 if (!nd->inode->i_op->lookup) {
1652 path_put(&nd->path); 1675 path_put(&nd->path);
1653 return -ENOTDIR; 1676 err = -ENOTDIR;
1654 } 1677 }
1655 } 1678 }
1656 1679
@@ -1842,11 +1865,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
1842 1865
1843 if (!(dir->i_mode & S_ISVTX)) 1866 if (!(dir->i_mode & S_ISVTX))
1844 return 0; 1867 return 0;
1868 if (current_user_ns() != inode_userns(inode))
1869 goto other_userns;
1845 if (inode->i_uid == fsuid) 1870 if (inode->i_uid == fsuid)
1846 return 0; 1871 return 0;
1847 if (dir->i_uid == fsuid) 1872 if (dir->i_uid == fsuid)
1848 return 0; 1873 return 0;
1849 return !capable(CAP_FOWNER); 1874
1875other_userns:
1876 return !ns_capable(inode_userns(inode), CAP_FOWNER);
1850} 1877}
1851 1878
1852/* 1879/*
@@ -2026,7 +2053,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
2026 } 2053 }
2027 2054
2028 /* O_NOATIME can only be set by the owner or superuser */ 2055 /* O_NOATIME can only be set by the owner or superuser */
2029 if (flag & O_NOATIME && !is_owner_or_cap(inode)) 2056 if (flag & O_NOATIME && !inode_owner_or_capable(inode))
2030 return -EPERM; 2057 return -EPERM;
2031 2058
2032 /* 2059 /*
@@ -2440,7 +2467,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2440 if (error) 2467 if (error)
2441 return error; 2468 return error;
2442 2469
2443 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2470 if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
2471 !ns_capable(inode_userns(dir), CAP_MKNOD))
2444 return -EPERM; 2472 return -EPERM;
2445 2473
2446 if (!dir->i_op->mknod) 2474 if (!dir->i_op->mknod)
diff --git a/fs/namespace.c b/fs/namespace.c
index 9263995bf6a1..d99bcf59e4c2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1030,18 +1030,6 @@ const struct seq_operations mounts_op = {
1030 .show = show_vfsmnt 1030 .show = show_vfsmnt
1031}; 1031};
1032 1032
1033static int uuid_is_nil(u8 *uuid)
1034{
1035 int i;
1036 u8 *cp = (u8 *)uuid;
1037
1038 for (i = 0; i < 16; i++) {
1039 if (*cp++)
1040 return 0;
1041 }
1042 return 1;
1043}
1044
1045static int show_mountinfo(struct seq_file *m, void *v) 1033static int show_mountinfo(struct seq_file *m, void *v)
1046{ 1034{
1047 struct proc_mounts *p = m->private; 1035 struct proc_mounts *p = m->private;
@@ -1085,10 +1073,6 @@ static int show_mountinfo(struct seq_file *m, void *v)
1085 if (IS_MNT_UNBINDABLE(mnt)) 1073 if (IS_MNT_UNBINDABLE(mnt))
1086 seq_puts(m, " unbindable"); 1074 seq_puts(m, " unbindable");
1087 1075
1088 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1089 /* print the uuid */
1090 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1091
1092 /* Filesystem specific data */ 1076 /* Filesystem specific data */
1093 seq_puts(m, " - "); 1077 seq_puts(m, " - ");
1094 show_type(m, sb); 1078 show_type(m, sb);
@@ -2701,7 +2685,7 @@ void __init mnt_init(void)
2701 if (!mount_hashtable) 2685 if (!mount_hashtable)
2702 panic("Failed to allocate mount hash table\n"); 2686 panic("Failed to allocate mount hash table\n");
2703 2687
2704 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); 2688 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2705 2689
2706 for (u = 0; u < HASH_SIZE; u++) 2690 for (u = 0; u < HASH_SIZE; u++)
2707 INIT_LIST_HEAD(&mount_hashtable[u]); 2691 INIT_LIST_HEAD(&mount_hashtable[u]);
diff --git a/fs/ncpfs/Makefile b/fs/ncpfs/Makefile
index 68ea095100a8..c66af563f2ce 100644
--- a/fs/ncpfs/Makefile
+++ b/fs/ncpfs/Makefile
@@ -11,6 +11,6 @@ ncpfs-$(CONFIG_NCPFS_EXTRAS) += symlink.o
11ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o 11ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o
12 12
13# If you want debugging output, please uncomment the following line 13# If you want debugging output, please uncomment the following line
14# EXTRA_CFLAGS += -DDEBUG_NCP=1 14# ccflags-y := -DDEBUG_NCP=1
15 15
16CFLAGS_ncplib_kernel.o := -finline-functions 16CFLAGS_ncplib_kernel.o := -finline-functions
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 00a1d1c3d3a4..0250e4ce4893 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -596,7 +596,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
596/* server->priv.data = NULL; */ 596/* server->priv.data = NULL; */
597 597
598 server->m = data; 598 server->m = data;
599 /* Althought anything producing this is buggy, it happens 599 /* Although anything producing this is buggy, it happens
600 now because of PATH_MAX changes.. */ 600 now because of PATH_MAX changes.. */
601 if (server->m.time_out < 1) { 601 if (server->m.time_out < 1) {
602 server->m.time_out = 10; 602 server->m.time_out = 10;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 14e0f9371d14..00ecf62ce7c1 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -241,7 +241,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
241 241
242 args->cbl_layout_type = ntohl(*p++); 242 args->cbl_layout_type = ntohl(*p++);
243 /* Depite the spec's xdr, iomode really belongs in the FILE switch, 243 /* Depite the spec's xdr, iomode really belongs in the FILE switch,
244 * as it is unuseable and ignored with the other types. 244 * as it is unusable and ignored with the other types.
245 */ 245 */
246 iomode = ntohl(*p++); 246 iomode = ntohl(*p++);
247 args->cbl_layoutchanged = ntohl(*p++); 247 args->cbl_layoutchanged = ntohl(*p++);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index abdf38d5971d..7237672216c8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -44,6 +44,7 @@
44/* #define NFS_DEBUG_VERBOSE 1 */ 44/* #define NFS_DEBUG_VERBOSE 1 */
45 45
46static int nfs_opendir(struct inode *, struct file *); 46static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *);
47static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
48static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
49static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
@@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
64 .read = generic_read_dir, 65 .read = generic_read_dir,
65 .readdir = nfs_readdir, 66 .readdir = nfs_readdir,
66 .open = nfs_opendir, 67 .open = nfs_opendir,
67 .release = nfs_release, 68 .release = nfs_closedir,
68 .fsync = nfs_fsync_dir, 69 .fsync = nfs_fsync_dir,
69}; 70};
70 71
@@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = {
133 134
134#endif /* CONFIG_NFS_V4 */ 135#endif /* CONFIG_NFS_V4 */
135 136
137static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred)
138{
139 struct nfs_open_dir_context *ctx;
140 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
141 if (ctx != NULL) {
142 ctx->duped = 0;
143 ctx->dir_cookie = 0;
144 ctx->dup_cookie = 0;
145 ctx->cred = get_rpccred(cred);
146 } else
147 ctx = ERR_PTR(-ENOMEM);
148 return ctx;
149}
150
151static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
152{
153 put_rpccred(ctx->cred);
154 kfree(ctx);
155}
156
136/* 157/*
137 * Open file 158 * Open file
138 */ 159 */
139static int 160static int
140nfs_opendir(struct inode *inode, struct file *filp) 161nfs_opendir(struct inode *inode, struct file *filp)
141{ 162{
142 int res; 163 int res = 0;
164 struct nfs_open_dir_context *ctx;
165 struct rpc_cred *cred;
143 166
144 dfprintk(FILE, "NFS: open dir(%s/%s)\n", 167 dfprintk(FILE, "NFS: open dir(%s/%s)\n",
145 filp->f_path.dentry->d_parent->d_name.name, 168 filp->f_path.dentry->d_parent->d_name.name,
@@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp)
147 170
148 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 171 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
149 172
150 /* Call generic open code in order to cache credentials */ 173 cred = rpc_lookup_cred();
151 res = nfs_open(inode, filp); 174 if (IS_ERR(cred))
175 return PTR_ERR(cred);
176 ctx = alloc_nfs_open_dir_context(cred);
177 if (IS_ERR(ctx)) {
178 res = PTR_ERR(ctx);
179 goto out;
180 }
181 filp->private_data = ctx;
152 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { 182 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
153 /* This is a mountpoint, so d_revalidate will never 183 /* This is a mountpoint, so d_revalidate will never
154 * have been called, so we need to refresh the 184 * have been called, so we need to refresh the
@@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp)
156 */ 186 */
157 __nfs_revalidate_inode(NFS_SERVER(inode), inode); 187 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
158 } 188 }
189out:
190 put_rpccred(cred);
159 return res; 191 return res;
160} 192}
161 193
194static int
195nfs_closedir(struct inode *inode, struct file *filp)
196{
197 put_nfs_open_dir_context(filp->private_data);
198 return 0;
199}
200
162struct nfs_cache_array_entry { 201struct nfs_cache_array_entry {
163 u64 cookie; 202 u64 cookie;
164 u64 ino; 203 u64 ino;
@@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
284{ 323{
285 loff_t diff = desc->file->f_pos - desc->current_index; 324 loff_t diff = desc->file->f_pos - desc->current_index;
286 unsigned int index; 325 unsigned int index;
326 struct nfs_open_dir_context *ctx = desc->file->private_data;
287 327
288 if (diff < 0) 328 if (diff < 0)
289 goto out_eof; 329 goto out_eof;
290 if (diff >= array->size) { 330 if (diff >= array->size) {
291 if (array->eof_index >= 0) 331 if (array->eof_index >= 0)
292 goto out_eof; 332 goto out_eof;
293 desc->current_index += array->size;
294 return -EAGAIN; 333 return -EAGAIN;
295 } 334 }
296 335
297 index = (unsigned int)diff; 336 index = (unsigned int)diff;
298 *desc->dir_cookie = array->array[index].cookie; 337 *desc->dir_cookie = array->array[index].cookie;
299 desc->cache_entry_index = index; 338 desc->cache_entry_index = index;
339 ctx->duped = 0;
300 return 0; 340 return 0;
301out_eof: 341out_eof:
302 desc->eof = 1; 342 desc->eof = 1;
@@ -307,10 +347,18 @@ static
307int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) 347int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
308{ 348{
309 int i; 349 int i;
350 loff_t new_pos;
310 int status = -EAGAIN; 351 int status = -EAGAIN;
352 struct nfs_open_dir_context *ctx = desc->file->private_data;
311 353
312 for (i = 0; i < array->size; i++) { 354 for (i = 0; i < array->size; i++) {
313 if (array->array[i].cookie == *desc->dir_cookie) { 355 if (array->array[i].cookie == *desc->dir_cookie) {
356 new_pos = desc->current_index + i;
357 if (new_pos < desc->file->f_pos) {
358 ctx->dup_cookie = *desc->dir_cookie;
359 ctx->duped = 1;
360 }
361 desc->file->f_pos = new_pos;
314 desc->cache_entry_index = i; 362 desc->cache_entry_index = i;
315 return 0; 363 return 0;
316 } 364 }
@@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
342 390
343 if (status == -EAGAIN) { 391 if (status == -EAGAIN) {
344 desc->last_cookie = array->last_cookie; 392 desc->last_cookie = array->last_cookie;
393 desc->current_index += array->size;
345 desc->page_index++; 394 desc->page_index++;
346 } 395 }
347 nfs_readdir_release_array(desc->page); 396 nfs_readdir_release_array(desc->page);
@@ -354,7 +403,8 @@ static
354int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, 403int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
355 struct nfs_entry *entry, struct file *file, struct inode *inode) 404 struct nfs_entry *entry, struct file *file, struct inode *inode)
356{ 405{
357 struct rpc_cred *cred = nfs_file_cred(file); 406 struct nfs_open_dir_context *ctx = file->private_data;
407 struct rpc_cred *cred = ctx->cred;
358 unsigned long timestamp, gencount; 408 unsigned long timestamp, gencount;
359 int error; 409 int error;
360 410
@@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
693 int i = 0; 743 int i = 0;
694 int res = 0; 744 int res = 0;
695 struct nfs_cache_array *array = NULL; 745 struct nfs_cache_array *array = NULL;
746 struct nfs_open_dir_context *ctx = file->private_data;
747
748 if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) {
749 if (printk_ratelimit()) {
750 pr_notice("NFS: directory %s/%s contains a readdir loop. "
751 "Please contact your server vendor. "
752 "Offending cookie: %llu\n",
753 file->f_dentry->d_parent->d_name.name,
754 file->f_dentry->d_name.name,
755 *desc->dir_cookie);
756 }
757 res = -ELOOP;
758 goto out;
759 }
696 760
697 array = nfs_readdir_get_array(desc->page); 761 array = nfs_readdir_get_array(desc->page);
698 if (IS_ERR(array)) { 762 if (IS_ERR(array)) {
@@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
785 struct inode *inode = dentry->d_inode; 849 struct inode *inode = dentry->d_inode;
786 nfs_readdir_descriptor_t my_desc, 850 nfs_readdir_descriptor_t my_desc,
787 *desc = &my_desc; 851 *desc = &my_desc;
852 struct nfs_open_dir_context *dir_ctx = filp->private_data;
788 int res; 853 int res;
789 854
790 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 855 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
801 memset(desc, 0, sizeof(*desc)); 866 memset(desc, 0, sizeof(*desc));
802 867
803 desc->file = filp; 868 desc->file = filp;
804 desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; 869 desc->dir_cookie = &dir_ctx->dir_cookie;
805 desc->decode = NFS_PROTO(inode)->decode_dirent; 870 desc->decode = NFS_PROTO(inode)->decode_dirent;
806 desc->plus = NFS_USE_READDIRPLUS(inode); 871 desc->plus = NFS_USE_READDIRPLUS(inode);
807 872
@@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
853{ 918{
854 struct dentry *dentry = filp->f_path.dentry; 919 struct dentry *dentry = filp->f_path.dentry;
855 struct inode *inode = dentry->d_inode; 920 struct inode *inode = dentry->d_inode;
921 struct nfs_open_dir_context *dir_ctx = filp->private_data;
856 922
857 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", 923 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
858 dentry->d_parent->d_name.name, 924 dentry->d_parent->d_name.name,
@@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
872 } 938 }
873 if (offset != filp->f_pos) { 939 if (offset != filp->f_pos) {
874 filp->f_pos = offset; 940 filp->f_pos = offset;
875 nfs_file_open_context(filp)->dir_cookie = 0; 941 dir_ctx->dir_cookie = 0;
942 dir_ctx->duped = 0;
876 } 943 }
877out: 944out:
878 mutex_unlock(&inode->i_mutex); 945 mutex_unlock(&inode->i_mutex);
@@ -1068,7 +1135,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1068 if (fhandle == NULL || fattr == NULL) 1135 if (fhandle == NULL || fattr == NULL)
1069 goto out_error; 1136 goto out_error;
1070 1137
1071 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1138 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1072 if (error) 1139 if (error)
1073 goto out_bad; 1140 goto out_bad;
1074 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1141 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1224,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1224 parent = dentry->d_parent; 1291 parent = dentry->d_parent;
1225 /* Protect against concurrent sillydeletes */ 1292 /* Protect against concurrent sillydeletes */
1226 nfs_block_sillyrename(parent); 1293 nfs_block_sillyrename(parent);
1227 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1294 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1228 if (error == -ENOENT) 1295 if (error == -ENOENT)
1229 goto no_entry; 1296 goto no_entry;
1230 if (error < 0) { 1297 if (error < 0) {
@@ -1562,7 +1629,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1562 if (dentry->d_inode) 1629 if (dentry->d_inode)
1563 goto out; 1630 goto out;
1564 if (fhandle->size == 0) { 1631 if (fhandle->size == 0) {
1565 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1632 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1566 if (error) 1633 if (error)
1567 goto out_error; 1634 goto out_error;
1568 } 1635 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d85a534b15cd..2f093ed16980 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -301,7 +301,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
301 * disk, but it retrieves and clears ctx->error after synching, despite 301 * disk, but it retrieves and clears ctx->error after synching, despite
302 * the two being set at the same time in nfs_context_set_write_error(). 302 * the two being set at the same time in nfs_context_set_write_error().
303 * This is because the former is used to notify the _next_ call to 303 * This is because the former is used to notify the _next_ call to
304 * nfs_file_write() that a write error occured, and hence cause it to 304 * nfs_file_write() that a write error occurred, and hence cause it to
305 * fall back to doing a synchronous write. 305 * fall back to doing a synchronous write.
306 */ 306 */
307static int 307static int
@@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync)
326 ret = xchg(&ctx->error, 0); 326 ret = xchg(&ctx->error, 0);
327 if (!ret && status < 0) 327 if (!ret && status < 0)
328 ret = status; 328 ret = status;
329 if (!ret && !datasync)
330 /* application has asked for meta-data sync */
331 ret = pnfs_layoutcommit_inode(inode, true);
329 return ret; 332 return ret;
330} 333}
331 334
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 1084792bc0fe..dcb61548887f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -222,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
222 goto out; 222 goto out;
223 } 223 }
224 224
225 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
226 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
227 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
228
225 inode = nfs_fhget(sb, mntfh, fattr); 229 inode = nfs_fhget(sb, mntfh, fattr);
226 if (IS_ERR(inode)) { 230 if (IS_ERR(inode)) {
227 dprintk("nfs_get_root: get root inode failed\n"); 231 dprintk("nfs_get_root: get root inode failed\n");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 01768e5e2c9b..57bb31ad7a5e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -254,7 +254,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
254 struct inode *inode = ERR_PTR(-ENOENT); 254 struct inode *inode = ERR_PTR(-ENOENT);
255 unsigned long hash; 255 unsigned long hash;
256 256
257 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) 257 nfs_attr_check_mountpoint(sb, fattr);
258
259 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
258 goto out_no_inode; 260 goto out_no_inode;
259 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) 261 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
260 goto out_no_inode; 262 goto out_no_inode;
@@ -298,8 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
298 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) 300 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
299 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 301 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
300 /* Deal with crossing mountpoints */ 302 /* Deal with crossing mountpoints */
301 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 303 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
302 && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { 304 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
303 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 305 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
304 inode->i_op = &nfs_referral_inode_operations; 306 inode->i_op = &nfs_referral_inode_operations;
305 else 307 else
@@ -639,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr
639 ctx->mode = f_mode; 641 ctx->mode = f_mode;
640 ctx->flags = 0; 642 ctx->flags = 0;
641 ctx->error = 0; 643 ctx->error = 0;
642 ctx->dir_cookie = 0;
643 nfs_init_lock_context(&ctx->lock_context); 644 nfs_init_lock_context(&ctx->lock_context);
644 ctx->lock_context.open_context = ctx; 645 ctx->lock_context.open_context = ctx;
645 INIT_LIST_HEAD(&ctx->list); 646 INIT_LIST_HEAD(&ctx->list);
@@ -1471,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1471 nfsi->delegation_state = 0; 1472 nfsi->delegation_state = 0;
1472 init_rwsem(&nfsi->rwsem); 1473 init_rwsem(&nfsi->rwsem);
1473 nfsi->layout = NULL; 1474 nfsi->layout = NULL;
1475 atomic_set(&nfsi->commits_outstanding, 0);
1474#endif 1476#endif
1475} 1477}
1476 1478
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 72e0bddf7a2f..ce118ce885dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -39,6 +39,12 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
39 return 0; 39 return 0;
40} 40}
41 41
42static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
43{
44 if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
45 fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
46}
47
42struct nfs_clone_mount { 48struct nfs_clone_mount {
43 const struct super_block *sb; 49 const struct super_block *sb;
44 const struct dentry *dentry; 50 const struct dentry *dentry;
@@ -214,6 +220,7 @@ extern const u32 nfs41_maxwrite_overhead;
214/* nfs4proc.c */ 220/* nfs4proc.c */
215#ifdef CONFIG_NFS_V4 221#ifdef CONFIG_NFS_V4
216extern struct rpc_procinfo nfs4_procedures[]; 222extern struct rpc_procinfo nfs4_procedures[];
223void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
217#endif 224#endif
218 225
219extern int nfs4_init_ds_session(struct nfs_client *clp); 226extern int nfs4_init_ds_session(struct nfs_client *clp);
@@ -276,11 +283,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
276extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
277 284
278/* write.c */ 285/* write.c */
286extern void nfs_commit_free(struct nfs_write_data *p);
279extern int nfs_initiate_write(struct nfs_write_data *data, 287extern int nfs_initiate_write(struct nfs_write_data *data,
280 struct rpc_clnt *clnt, 288 struct rpc_clnt *clnt,
281 const struct rpc_call_ops *call_ops, 289 const struct rpc_call_ops *call_ops,
282 int how); 290 int how);
283extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 291extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
292extern int nfs_initiate_commit(struct nfs_write_data *data,
293 struct rpc_clnt *clnt,
294 const struct rpc_call_ops *call_ops,
295 int how);
296extern void nfs_init_commit(struct nfs_write_data *data,
297 struct list_head *head,
298 struct pnfs_layout_segment *lseg);
299void nfs_retry_commit(struct list_head *page_list,
300 struct pnfs_layout_segment *lseg);
301void nfs_commit_clear_lock(struct nfs_inode *nfsi);
302void nfs_commitdata_release(void *data);
303void nfs_commit_release_pages(struct nfs_write_data *data);
304
284#ifdef CONFIG_MIGRATION 305#ifdef CONFIG_MIGRATION
285extern int nfs_migrate_page(struct address_space *, 306extern int nfs_migrate_page(struct address_space *,
286 struct page *, struct page *); 307 struct page *, struct page *);
@@ -296,12 +317,14 @@ extern int nfs4_init_client(struct nfs_client *clp,
296 rpc_authflavor_t authflavour, 317 rpc_authflavor_t authflavour,
297 int noresvport); 318 int noresvport);
298extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); 319extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
299extern int _nfs4_call_sync(struct nfs_server *server, 320extern int _nfs4_call_sync(struct rpc_clnt *clnt,
321 struct nfs_server *server,
300 struct rpc_message *msg, 322 struct rpc_message *msg,
301 struct nfs4_sequence_args *args, 323 struct nfs4_sequence_args *args,
302 struct nfs4_sequence_res *res, 324 struct nfs4_sequence_res *res,
303 int cache_reply); 325 int cache_reply);
304extern int _nfs4_call_sync_session(struct nfs_server *server, 326extern int _nfs4_call_sync_session(struct rpc_clnt *clnt,
327 struct nfs_server *server,
305 struct rpc_message *msg, 328 struct rpc_message *msg,
306 struct nfs4_sequence_args *args, 329 struct nfs4_sequence_args *args,
307 struct nfs4_sequence_res *res, 330 struct nfs4_sequence_res *res,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index c0b8344db0c6..89fc160fd5b0 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -15,6 +15,7 @@
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
17#include <linux/vfs.h> 17#include <linux/vfs.h>
18#include <linux/sunrpc/gss_api.h>
18#include "internal.h" 19#include "internal.h"
19 20
20#define NFSDBG_FACILITY NFSDBG_VFS 21#define NFSDBG_FACILITY NFSDBG_VFS
@@ -27,7 +28,8 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 28
28static struct vfsmount *nfs_do_submount(struct dentry *dentry, 29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
29 struct nfs_fh *fh, 30 struct nfs_fh *fh,
30 struct nfs_fattr *fattr); 31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
31 33
32/* 34/*
33 * nfs_path - reconstruct the path given an arbitrary dentry 35 * nfs_path - reconstruct the path given an arbitrary dentry
@@ -98,7 +100,7 @@ rename_retry:
98 namelen--; 100 namelen--;
99 buflen -= namelen; 101 buflen -= namelen;
100 if (buflen < 0) { 102 if (buflen < 0) {
101 spin_lock(&dentry->d_lock); 103 spin_unlock(&dentry->d_lock);
102 rcu_read_unlock(); 104 rcu_read_unlock();
103 goto Elong; 105 goto Elong;
104 } 106 }
@@ -108,7 +110,7 @@ rename_retry:
108 rcu_read_unlock(); 110 rcu_read_unlock();
109 return end; 111 return end;
110Elong_unlock: 112Elong_unlock:
111 spin_lock(&dentry->d_lock); 113 spin_unlock(&dentry->d_lock);
112 rcu_read_unlock(); 114 rcu_read_unlock();
113 if (read_seqretry(&rename_lock, seq)) 115 if (read_seqretry(&rename_lock, seq))
114 goto rename_retry; 116 goto rename_retry;
@@ -116,6 +118,99 @@ Elong:
116 return ERR_PTR(-ENAMETOOLONG); 118 return ERR_PTR(-ENAMETOOLONG);
117} 119}
118 120
121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static int nfs_negotiate_security(const struct dentry *parent,
152 const struct dentry *dentry,
153 rpc_authflavor_t *flavor)
154{
155 struct page *page;
156 struct nfs4_secinfo_flavors *flavors;
157 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
158 int ret = -EPERM;
159
160 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
161 if (secinfo != NULL) {
162 page = alloc_page(GFP_KERNEL);
163 if (!page) {
164 ret = -ENOMEM;
165 goto out;
166 }
167 flavors = page_address(page);
168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
169 *flavor = nfs_find_best_sec(flavors, dentry->d_inode);
170 put_page(page);
171 }
172
173out:
174 return ret;
175}
176
177static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
178 struct dentry *dentry, struct path *path,
179 struct nfs_fh *fh, struct nfs_fattr *fattr,
180 rpc_authflavor_t *flavor)
181{
182 struct rpc_clnt *clone;
183 struct rpc_auth *auth;
184 int err;
185
186 err = nfs_negotiate_security(parent, path->dentry, flavor);
187 if (err < 0)
188 goto out;
189 clone = rpc_clone_client(server->client);
190 auth = rpcauth_create(*flavor, clone);
191 if (!auth) {
192 err = -EIO;
193 goto out_shutdown;
194 }
195 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
196 &path->dentry->d_name,
197 fh, fattr);
198out_shutdown:
199 rpc_shutdown_client(clone);
200out:
201 return err;
202}
203#else /* CONFIG_NFS_V4 */
204static inline int nfs_lookup_with_sec(struct nfs_server *server,
205 struct dentry *parent, struct dentry *dentry,
206 struct path *path, struct nfs_fh *fh,
207 struct nfs_fattr *fattr,
208 rpc_authflavor_t *flavor)
209{
210 return -EPERM;
211}
212#endif /* CONFIG_NFS_V4 */
213
119/* 214/*
120 * nfs_d_automount - Handle crossing a mountpoint on the server 215 * nfs_d_automount - Handle crossing a mountpoint on the server
121 * @path - The mountpoint 216 * @path - The mountpoint
@@ -136,6 +231,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
136 struct nfs_fh *fh = NULL; 231 struct nfs_fh *fh = NULL;
137 struct nfs_fattr *fattr = NULL; 232 struct nfs_fattr *fattr = NULL;
138 int err; 233 int err;
234 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
139 235
140 dprintk("--> nfs_d_automount()\n"); 236 dprintk("--> nfs_d_automount()\n");
141 237
@@ -153,9 +249,11 @@ struct vfsmount *nfs_d_automount(struct path *path)
153 249
154 /* Look it up again to get its attributes */ 250 /* Look it up again to get its attributes */
155 parent = dget_parent(path->dentry); 251 parent = dget_parent(path->dentry);
156 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, 252 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
157 &path->dentry->d_name, 253 &path->dentry->d_name,
158 fh, fattr); 254 fh, fattr);
255 if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
256 err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
159 dput(parent); 257 dput(parent);
160 if (err != 0) { 258 if (err != 0) {
161 mnt = ERR_PTR(err); 259 mnt = ERR_PTR(err);
@@ -165,7 +263,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
165 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 263 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
166 mnt = nfs_do_refmount(path->dentry); 264 mnt = nfs_do_refmount(path->dentry);
167 else 265 else
168 mnt = nfs_do_submount(path->dentry, fh, fattr); 266 mnt = nfs_do_submount(path->dentry, fh, fattr, flavor);
169 if (IS_ERR(mnt)) 267 if (IS_ERR(mnt))
170 goto out; 268 goto out;
171 269
@@ -232,17 +330,20 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
232 * @dentry - parent directory 330 * @dentry - parent directory
233 * @fh - filehandle for new root dentry 331 * @fh - filehandle for new root dentry
234 * @fattr - attributes for new root inode 332 * @fattr - attributes for new root inode
333 * @authflavor - security flavor to use when performing the mount
235 * 334 *
236 */ 335 */
237static struct vfsmount *nfs_do_submount(struct dentry *dentry, 336static struct vfsmount *nfs_do_submount(struct dentry *dentry,
238 struct nfs_fh *fh, 337 struct nfs_fh *fh,
239 struct nfs_fattr *fattr) 338 struct nfs_fattr *fattr,
339 rpc_authflavor_t authflavor)
240{ 340{
241 struct nfs_clone_mount mountdata = { 341 struct nfs_clone_mount mountdata = {
242 .sb = dentry->d_sb, 342 .sb = dentry->d_sb,
243 .dentry = dentry, 343 .dentry = dentry,
244 .fh = fh, 344 .fh = fh,
245 .fattr = fattr, 345 .fattr = fattr,
346 .authflavor = authflavor,
246 }; 347 };
247 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 348 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
248 char *page = (char *) __get_free_page(GFP_USER); 349 char *page = (char *) __get_free_page(GFP_USER);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d0c80d8b3f96..38053d823eb0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -141,7 +141,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
141} 141}
142 142
143static int 143static int
144nfs3_proc_lookup(struct inode *dir, struct qstr *name, 144nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
145 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 145 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
146{ 146{
147 struct nfs3_diropargs arg = { 147 struct nfs3_diropargs arg = {
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c64be1cff080..e1c261ddd65d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -57,7 +57,8 @@ enum nfs4_session_state {
57struct nfs4_minor_version_ops { 57struct nfs4_minor_version_ops {
58 u32 minor_version; 58 u32 minor_version;
59 59
60 int (*call_sync)(struct nfs_server *server, 60 int (*call_sync)(struct rpc_clnt *clnt,
61 struct nfs_server *server,
61 struct rpc_message *msg, 62 struct rpc_message *msg,
62 struct nfs4_sequence_args *args, 63 struct nfs4_sequence_args *args,
63 struct nfs4_sequence_res *res, 64 struct nfs4_sequence_res *res,
@@ -262,6 +263,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
262extern int nfs4_init_session(struct nfs_server *server); 263extern int nfs4_init_session(struct nfs_server *server);
263extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 264extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
264 struct nfs_fsinfo *fsinfo); 265 struct nfs_fsinfo *fsinfo);
266extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
267 bool sync);
265 268
266static inline bool 269static inline bool
267is_ds_only_client(struct nfs_client *clp) 270is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 428558464817..6f8192f4cfc7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -154,6 +154,23 @@ static int filelayout_read_done_cb(struct rpc_task *task,
154} 154}
155 155
156/* 156/*
157 * We reference the rpc_cred of the first WRITE that triggers the need for
158 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
159 * rfc5661 is not clear about which credential should be used.
160 */
161static void
162filelayout_set_layoutcommit(struct nfs_write_data *wdata)
163{
164 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
165 wdata->res.verf->committed == NFS_FILE_SYNC)
166 return;
167
168 pnfs_set_layoutcommit(wdata);
169 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
170 (unsigned long) wdata->lseg->pls_end_pos);
171}
172
173/*
157 * Call ops for the async read/write cases 174 * Call ops for the async read/write cases
158 * In the case of dense layouts, the offset needs to be reset to its 175 * In the case of dense layouts, the offset needs to be reset to its
159 * original value. 176 * original value.
@@ -210,6 +227,38 @@ static int filelayout_write_done_cb(struct rpc_task *task,
210 return -EAGAIN; 227 return -EAGAIN;
211 } 228 }
212 229
230 filelayout_set_layoutcommit(data);
231 return 0;
232}
233
234/* Fake up some data that will cause nfs_commit_release to retry the writes. */
235static void prepare_to_resend_writes(struct nfs_write_data *data)
236{
237 struct nfs_page *first = nfs_list_entry(data->pages.next);
238
239 data->task.tk_status = 0;
240 memcpy(data->verf.verifier, first->wb_verf.verifier,
241 sizeof(first->wb_verf.verifier));
242 data->verf.verifier[0]++; /* ensure verifier mismatch */
243}
244
245static int filelayout_commit_done_cb(struct rpc_task *task,
246 struct nfs_write_data *data)
247{
248 int reset = 0;
249
250 if (filelayout_async_handle_error(task, data->args.context->state,
251 data->ds_clp, &reset) == -EAGAIN) {
252 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
253 __func__, data->ds_clp, data->ds_clp->cl_session);
254 if (reset) {
255 prepare_to_resend_writes(data);
256 filelayout_set_lo_fail(data->lseg);
257 } else
258 nfs_restart_rpc(task, data->ds_clp);
259 return -EAGAIN;
260 }
261
213 return 0; 262 return 0;
214} 263}
215 264
@@ -240,6 +289,16 @@ static void filelayout_write_release(void *data)
240 wdata->mds_ops->rpc_release(data); 289 wdata->mds_ops->rpc_release(data);
241} 290}
242 291
292static void filelayout_commit_release(void *data)
293{
294 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
295
296 nfs_commit_release_pages(wdata);
297 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
298 nfs_commit_clear_lock(NFS_I(wdata->inode));
299 nfs_commitdata_release(wdata);
300}
301
243struct rpc_call_ops filelayout_read_call_ops = { 302struct rpc_call_ops filelayout_read_call_ops = {
244 .rpc_call_prepare = filelayout_read_prepare, 303 .rpc_call_prepare = filelayout_read_prepare,
245 .rpc_call_done = filelayout_read_call_done, 304 .rpc_call_done = filelayout_read_call_done,
@@ -252,6 +311,12 @@ struct rpc_call_ops filelayout_write_call_ops = {
252 .rpc_release = filelayout_write_release, 311 .rpc_release = filelayout_write_release,
253}; 312};
254 313
314struct rpc_call_ops filelayout_commit_call_ops = {
315 .rpc_call_prepare = filelayout_write_prepare,
316 .rpc_call_done = filelayout_write_call_done,
317 .rpc_release = filelayout_commit_release,
318};
319
255static enum pnfs_try_status 320static enum pnfs_try_status
256filelayout_read_pagelist(struct nfs_read_data *data) 321filelayout_read_pagelist(struct nfs_read_data *data)
257{ 322{
@@ -320,10 +385,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
320 data->inode->i_ino, sync, (size_t) data->args.count, offset, 385 data->inode->i_ino, sync, (size_t) data->args.count, offset,
321 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 386 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
322 387
323 /* We can't handle commit to ds yet */
324 if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
325 data->args.stable = NFS_FILE_SYNC;
326
327 data->write_done_cb = filelayout_write_done_cb; 388 data->write_done_cb = filelayout_write_done_cb;
328 data->ds_clp = ds->ds_clp; 389 data->ds_clp = ds->ds_clp;
329 fh = nfs4_fl_select_ds_fh(lseg, j); 390 fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -441,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
441 struct nfs4_layoutget_res *lgr, 502 struct nfs4_layoutget_res *lgr,
442 struct nfs4_deviceid *id) 503 struct nfs4_deviceid *id)
443{ 504{
444 uint32_t *p = (uint32_t *)lgr->layout.buf; 505 struct xdr_stream stream;
506 struct xdr_buf buf = {
507 .pages = lgr->layoutp->pages,
508 .page_len = lgr->layoutp->len,
509 .buflen = lgr->layoutp->len,
510 .len = lgr->layoutp->len,
511 };
512 struct page *scratch;
513 __be32 *p;
445 uint32_t nfl_util; 514 uint32_t nfl_util;
446 int i; 515 int i;
447 516
448 dprintk("%s: set_layout_map Begin\n", __func__); 517 dprintk("%s: set_layout_map Begin\n", __func__);
449 518
519 scratch = alloc_page(GFP_KERNEL);
520 if (!scratch)
521 return -ENOMEM;
522
523 xdr_init_decode(&stream, &buf, NULL);
524 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
525
526 /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
527 * num_fh (4) */
528 p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
529 if (unlikely(!p))
530 goto out_err;
531
450 memcpy(id, p, sizeof(*id)); 532 memcpy(id, p, sizeof(*id));
451 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 533 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
452 print_deviceid(id); 534 print_deviceid(id);
@@ -468,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
468 __func__, nfl_util, fl->num_fh, fl->first_stripe_index, 550 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
469 fl->pattern_offset); 551 fl->pattern_offset);
470 552
553 if (!fl->num_fh)
554 goto out_err;
555
471 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 556 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
472 GFP_KERNEL); 557 GFP_KERNEL);
473 if (!fl->fh_array) 558 if (!fl->fh_array)
474 return -ENOMEM; 559 goto out_err;
475 560
476 for (i = 0; i < fl->num_fh; i++) { 561 for (i = 0; i < fl->num_fh; i++) {
477 /* Do we want to use a mempool here? */ 562 /* Do we want to use a mempool here? */
478 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 563 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
479 if (!fl->fh_array[i]) { 564 if (!fl->fh_array[i])
480 filelayout_free_fh_array(fl); 565 goto out_err_free;
481 return -ENOMEM; 566
482 } 567 p = xdr_inline_decode(&stream, 4);
568 if (unlikely(!p))
569 goto out_err_free;
483 fl->fh_array[i]->size = be32_to_cpup(p++); 570 fl->fh_array[i]->size = be32_to_cpup(p++);
484 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 571 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
485 printk(KERN_ERR "Too big fh %d received %d\n", 572 printk(KERN_ERR "Too big fh %d received %d\n",
486 i, fl->fh_array[i]->size); 573 i, fl->fh_array[i]->size);
487 filelayout_free_fh_array(fl); 574 goto out_err_free;
488 return -EIO;
489 } 575 }
576
577 p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
578 if (unlikely(!p))
579 goto out_err_free;
490 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); 580 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
491 p += XDR_QUADLEN(fl->fh_array[i]->size);
492 dprintk("DEBUG: %s: fh len %d\n", __func__, 581 dprintk("DEBUG: %s: fh len %d\n", __func__,
493 fl->fh_array[i]->size); 582 fl->fh_array[i]->size);
494 } 583 }
495 584
585 __free_page(scratch);
496 return 0; 586 return 0;
587
588out_err_free:
589 filelayout_free_fh_array(fl);
590out_err:
591 __free_page(scratch);
592 return -EIO;
593}
594
595static void
596filelayout_free_lseg(struct pnfs_layout_segment *lseg)
597{
598 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
599
600 dprintk("--> %s\n", __func__);
601 nfs4_fl_put_deviceid(fl->dsaddr);
602 kfree(fl->commit_buckets);
603 _filelayout_free_lseg(fl);
497} 604}
498 605
499static struct pnfs_layout_segment * 606static struct pnfs_layout_segment *
@@ -514,17 +621,28 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
514 _filelayout_free_lseg(fl); 621 _filelayout_free_lseg(fl);
515 return NULL; 622 return NULL;
516 } 623 }
517 return &fl->generic_hdr;
518}
519 624
520static void 625 /* This assumes there is only one IOMODE_RW lseg. What
521filelayout_free_lseg(struct pnfs_layout_segment *lseg) 626 * we really want to do is have a layout_hdr level
522{ 627 * dictionary of <multipath_list4, fh> keys, each
523 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 628 * associated with a struct list_head, populated by calls
524 629 * to filelayout_write_pagelist().
525 dprintk("--> %s\n", __func__); 630 * */
526 nfs4_fl_put_deviceid(fl->dsaddr); 631 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
527 _filelayout_free_lseg(fl); 632 int i;
633 int size = (fl->stripe_type == STRIPE_SPARSE) ?
634 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
635
636 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
637 if (!fl->commit_buckets) {
638 filelayout_free_lseg(&fl->generic_hdr);
639 return NULL;
640 }
641 fl->number_of_buckets = size;
642 for (i = 0; i < size; i++)
643 INIT_LIST_HEAD(&fl->commit_buckets[i]);
644 }
645 return &fl->generic_hdr;
528} 646}
529 647
530/* 648/*
@@ -552,6 +670,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
552 return (p_stripe == r_stripe); 670 return (p_stripe == r_stripe);
553} 671}
554 672
673static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
674{
675 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
676}
677
678static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
679{
680 if (fl->stripe_type == STRIPE_SPARSE)
681 return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
682 else
683 return j;
684}
685
686struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
687{
688 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
689 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
690 u32 i, j;
691 struct list_head *list;
692
693 /* Note that we are calling nfs4_fl_calc_j_index on each page
694 * that ends up being committed to a data server. An attractive
695 * alternative is to add a field to nfs_write_data and nfs_page
696 * to store the value calculated in filelayout_write_pagelist
697 * and just use that here.
698 */
699 j = nfs4_fl_calc_j_index(lseg,
700 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
701 i = select_bucket_index(fl, j);
702 list = &fl->commit_buckets[i];
703 if (list_empty(list)) {
704 /* Non-empty buckets hold a reference on the lseg */
705 get_lseg(lseg);
706 }
707 return list;
708}
709
710static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
711{
712 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
713
714 if (flseg->stripe_type == STRIPE_SPARSE)
715 return i;
716 else
717 return nfs4_fl_calc_ds_index(lseg, i);
718}
719
720static struct nfs_fh *
721select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
722{
723 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
724
725 if (flseg->stripe_type == STRIPE_SPARSE) {
726 if (flseg->num_fh == 1)
727 i = 0;
728 else if (flseg->num_fh == 0)
729 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
730 return NULL;
731 }
732 return flseg->fh_array[i];
733}
734
735static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
736{
737 struct pnfs_layout_segment *lseg = data->lseg;
738 struct nfs4_pnfs_ds *ds;
739 u32 idx;
740 struct nfs_fh *fh;
741
742 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
743 ds = nfs4_fl_prepare_ds(lseg, idx);
744 if (!ds) {
745 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
746 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
747 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
748 prepare_to_resend_writes(data);
749 data->mds_ops->rpc_release(data);
750 return -EAGAIN;
751 }
752 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
753 data->write_done_cb = filelayout_commit_done_cb;
754 data->ds_clp = ds->ds_clp;
755 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
756 if (fh)
757 data->args.fh = fh;
758 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
759 &filelayout_commit_call_ops, how);
760}
761
762/*
763 * This is only useful while we are using whole file layouts.
764 */
765static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
766{
767 struct pnfs_layout_segment *lseg, *rv = NULL;
768
769 spin_lock(&inode->i_lock);
770 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
771 if (lseg->pls_range.iomode == IOMODE_RW)
772 rv = get_lseg(lseg);
773 spin_unlock(&inode->i_lock);
774 return rv;
775}
776
777static int alloc_ds_commits(struct inode *inode, struct list_head *list)
778{
779 struct pnfs_layout_segment *lseg;
780 struct nfs4_filelayout_segment *fl;
781 struct nfs_write_data *data;
782 int i, j;
783
784 /* Won't need this when non-whole file layout segments are supported
785 * instead we will use a pnfs_layout_hdr structure */
786 lseg = find_only_write_lseg(inode);
787 if (!lseg)
788 return 0;
789 fl = FILELAYOUT_LSEG(lseg);
790 for (i = 0; i < fl->number_of_buckets; i++) {
791 if (list_empty(&fl->commit_buckets[i]))
792 continue;
793 data = nfs_commitdata_alloc();
794 if (!data)
795 goto out_bad;
796 data->ds_commit_index = i;
797 data->lseg = lseg;
798 list_add(&data->pages, list);
799 }
800 put_lseg(lseg);
801 return 0;
802
803out_bad:
804 for (j = i; j < fl->number_of_buckets; j++) {
805 if (list_empty(&fl->commit_buckets[i]))
806 continue;
807 nfs_retry_commit(&fl->commit_buckets[i], lseg);
808 put_lseg(lseg); /* associated with emptying bucket */
809 }
810 put_lseg(lseg);
811 /* Caller will clean up entries put on list */
812 return -ENOMEM;
813}
814
815/* This follows nfs_commit_list pretty closely */
816static int
817filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
818 int how)
819{
820 struct nfs_write_data *data, *tmp;
821 LIST_HEAD(list);
822
823 if (!list_empty(mds_pages)) {
824 data = nfs_commitdata_alloc();
825 if (!data)
826 goto out_bad;
827 data->lseg = NULL;
828 list_add(&data->pages, &list);
829 }
830
831 if (alloc_ds_commits(inode, &list))
832 goto out_bad;
833
834 list_for_each_entry_safe(data, tmp, &list, pages) {
835 list_del_init(&data->pages);
836 atomic_inc(&NFS_I(inode)->commits_outstanding);
837 if (!data->lseg) {
838 nfs_init_commit(data, mds_pages, NULL);
839 nfs_initiate_commit(data, NFS_CLIENT(inode),
840 data->mds_ops, how);
841 } else {
842 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
843 filelayout_initiate_commit(data, how);
844 }
845 }
846 return 0;
847 out_bad:
848 list_for_each_entry_safe(data, tmp, &list, pages) {
849 nfs_retry_commit(&data->pages, data->lseg);
850 list_del_init(&data->pages);
851 nfs_commit_free(data);
852 }
853 nfs_retry_commit(mds_pages, NULL);
854 nfs_commit_clear_lock(NFS_I(inode));
855 return -ENOMEM;
856}
857
555static struct pnfs_layoutdriver_type filelayout_type = { 858static struct pnfs_layoutdriver_type filelayout_type = {
556 .id = LAYOUT_NFSV4_1_FILES, 859 .id = LAYOUT_NFSV4_1_FILES,
557 .name = "LAYOUT_NFSV4_1_FILES", 860 .name = "LAYOUT_NFSV4_1_FILES",
@@ -559,6 +862,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
559 .alloc_lseg = filelayout_alloc_lseg, 862 .alloc_lseg = filelayout_alloc_lseg,
560 .free_lseg = filelayout_free_lseg, 863 .free_lseg = filelayout_free_lseg,
561 .pg_test = filelayout_pg_test, 864 .pg_test = filelayout_pg_test,
865 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
866 .choose_commit_list = filelayout_choose_commit_list,
867 .commit_pagelist = filelayout_commit_pagelist,
562 .read_pagelist = filelayout_read_pagelist, 868 .read_pagelist = filelayout_read_pagelist,
563 .write_pagelist = filelayout_write_pagelist, 869 .write_pagelist = filelayout_write_pagelist,
564}; 870};
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index ee0c907742b5..7c44579f5832 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,7 +33,7 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Field testing shows we need to support upto 4096 stripe indices. 36 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures. 39 * RFC 5661 multipath_list4 structures.
@@ -79,6 +79,8 @@ struct nfs4_filelayout_segment {
79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
80 unsigned int num_fh; 80 unsigned int num_fh;
81 struct nfs_fh **fh_array; 81 struct nfs_fh **fh_array;
82 struct list_head *commit_buckets; /* Sort commits to ds */
83 int number_of_buckets;
82}; 84};
83 85
84static inline struct nfs4_filelayout_segment * 86static inline struct nfs4_filelayout_segment *
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 68143c162e3b..de5350f2b249 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -261,7 +261,7 @@ out:
261 * Currently only support ipv4, and one multi-path address. 261 * Currently only support ipv4, and one multi-path address.
262 */ 262 */
263static struct nfs4_pnfs_ds * 263static struct nfs4_pnfs_ds *
264decode_and_add_ds(__be32 **pp, struct inode *inode) 264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
265{ 265{
266 struct nfs4_pnfs_ds *ds = NULL; 266 struct nfs4_pnfs_ds *ds = NULL;
267 char *buf; 267 char *buf;
@@ -269,25 +269,34 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
269 u32 ip_addr, port; 269 u32 ip_addr, port;
270 int nlen, rlen, i; 270 int nlen, rlen, i;
271 int tmp[2]; 271 int tmp[2];
272 __be32 *r_netid, *r_addr, *p = *pp; 272 __be32 *p;
273 273
274 /* r_netid */ 274 /* r_netid */
275 p = xdr_inline_decode(streamp, 4);
276 if (unlikely(!p))
277 goto out_err;
275 nlen = be32_to_cpup(p++); 278 nlen = be32_to_cpup(p++);
276 r_netid = p;
277 p += XDR_QUADLEN(nlen);
278 279
279 /* r_addr */ 280 p = xdr_inline_decode(streamp, nlen);
280 rlen = be32_to_cpup(p++); 281 if (unlikely(!p))
281 r_addr = p; 282 goto out_err;
282 p += XDR_QUADLEN(rlen);
283 *pp = p;
284 283
285 /* Check that netid is "tcp" */ 284 /* Check that netid is "tcp" */
286 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { 285 if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
287 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); 286 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
288 goto out_err; 287 goto out_err;
289 } 288 }
290 289
290 /* r_addr */
291 p = xdr_inline_decode(streamp, 4);
292 if (unlikely(!p))
293 goto out_err;
294 rlen = be32_to_cpup(p);
295
296 p = xdr_inline_decode(streamp, rlen);
297 if (unlikely(!p))
298 goto out_err;
299
291 /* ipv6 length plus port is legal */ 300 /* ipv6 length plus port is legal */
292 if (rlen > INET6_ADDRSTRLEN + 8) { 301 if (rlen > INET6_ADDRSTRLEN + 8) {
293 dprintk("%s: Invalid address, length %d\n", __func__, 302 dprintk("%s: Invalid address, length %d\n", __func__,
@@ -300,7 +309,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
300 goto out_err; 309 goto out_err;
301 } 310 }
302 buf[rlen] = '\0'; 311 buf[rlen] = '\0';
303 memcpy(buf, r_addr, rlen); 312 memcpy(buf, p, rlen);
304 313
305 /* replace the port dots with dashes for the in4_pton() delimiter*/ 314 /* replace the port dots with dashes for the in4_pton() delimiter*/
306 for (i = 0; i < 2; i++) { 315 for (i = 0; i < 2; i++) {
@@ -336,90 +345,154 @@ out_err:
336static struct nfs4_file_layout_dsaddr* 345static struct nfs4_file_layout_dsaddr*
337decode_device(struct inode *ino, struct pnfs_device *pdev) 346decode_device(struct inode *ino, struct pnfs_device *pdev)
338{ 347{
339 int i, dummy; 348 int i;
340 u32 cnt, num; 349 u32 cnt, num;
341 u8 *indexp; 350 u8 *indexp;
342 __be32 *p = (__be32 *)pdev->area, *indicesp; 351 __be32 *p;
343 struct nfs4_file_layout_dsaddr *dsaddr; 352 u8 *stripe_indices;
353 u8 max_stripe_index;
354 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
355 struct xdr_stream stream;
356 struct xdr_buf buf = {
357 .pages = pdev->pages,
358 .page_len = pdev->pglen,
359 .buflen = pdev->pglen,
360 .len = pdev->pglen,
361 };
362 struct page *scratch;
363
364 /* set up xdr stream */
365 scratch = alloc_page(GFP_KERNEL);
366 if (!scratch)
367 goto out_err;
368
369 xdr_init_decode(&stream, &buf, NULL);
370 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
344 371
345 /* Get the stripe count (number of stripe index) */ 372 /* Get the stripe count (number of stripe index) */
346 cnt = be32_to_cpup(p++); 373 p = xdr_inline_decode(&stream, 4);
374 if (unlikely(!p))
375 goto out_err_free_scratch;
376
377 cnt = be32_to_cpup(p);
347 dprintk("%s stripe count %d\n", __func__, cnt); 378 dprintk("%s stripe count %d\n", __func__, cnt);
348 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 379 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
349 printk(KERN_WARNING "%s: stripe count %d greater than " 380 printk(KERN_WARNING "%s: stripe count %d greater than "
350 "supported maximum %d\n", __func__, 381 "supported maximum %d\n", __func__,
351 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 382 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
352 goto out_err; 383 goto out_err_free_scratch;
384 }
385
386 /* read stripe indices */
387 stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
388 if (!stripe_indices)
389 goto out_err_free_scratch;
390
391 p = xdr_inline_decode(&stream, cnt << 2);
392 if (unlikely(!p))
393 goto out_err_free_stripe_indices;
394
395 indexp = &stripe_indices[0];
396 max_stripe_index = 0;
397 for (i = 0; i < cnt; i++) {
398 *indexp = be32_to_cpup(p++);
399 max_stripe_index = max(max_stripe_index, *indexp);
400 indexp++;
353 } 401 }
354 402
355 /* Check the multipath list count */ 403 /* Check the multipath list count */
356 indicesp = p; 404 p = xdr_inline_decode(&stream, 4);
357 p += XDR_QUADLEN(cnt << 2); 405 if (unlikely(!p))
358 num = be32_to_cpup(p++); 406 goto out_err_free_stripe_indices;
407
408 num = be32_to_cpup(p);
359 dprintk("%s ds_num %u\n", __func__, num); 409 dprintk("%s ds_num %u\n", __func__, num);
360 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 410 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
361 printk(KERN_WARNING "%s: multipath count %d greater than " 411 printk(KERN_WARNING "%s: multipath count %d greater than "
362 "supported maximum %d\n", __func__, 412 "supported maximum %d\n", __func__,
363 num, NFS4_PNFS_MAX_MULTI_CNT); 413 num, NFS4_PNFS_MAX_MULTI_CNT);
364 goto out_err; 414 goto out_err_free_stripe_indices;
365 } 415 }
416
417 /* validate stripe indices are all < num */
418 if (max_stripe_index >= num) {
419 printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
420 __func__, max_stripe_index, num);
421 goto out_err_free_stripe_indices;
422 }
423
366 dsaddr = kzalloc(sizeof(*dsaddr) + 424 dsaddr = kzalloc(sizeof(*dsaddr) +
367 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
368 GFP_KERNEL); 426 GFP_KERNEL);
369 if (!dsaddr) 427 if (!dsaddr)
370 goto out_err; 428 goto out_err_free_stripe_indices;
371
372 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
373 if (!dsaddr->stripe_indices)
374 goto out_err_free;
375 429
376 dsaddr->stripe_count = cnt; 430 dsaddr->stripe_count = cnt;
431 dsaddr->stripe_indices = stripe_indices;
432 stripe_indices = NULL;
377 dsaddr->ds_num = num; 433 dsaddr->ds_num = num;
378 434
379 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); 435 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
380 436
381 /* Go back an read stripe indices */
382 p = indicesp;
383 indexp = &dsaddr->stripe_indices[0];
384 for (i = 0; i < dsaddr->stripe_count; i++) {
385 *indexp = be32_to_cpup(p++);
386 if (*indexp >= num)
387 goto out_err_free;
388 indexp++;
389 }
390 /* Skip already read multipath list count */
391 p++;
392
393 for (i = 0; i < dsaddr->ds_num; i++) { 437 for (i = 0; i < dsaddr->ds_num; i++) {
394 int j; 438 int j;
439 u32 mp_count;
440
441 p = xdr_inline_decode(&stream, 4);
442 if (unlikely(!p))
443 goto out_err_free_deviceid;
395 444
396 dummy = be32_to_cpup(p++); /* multipath count */ 445 mp_count = be32_to_cpup(p); /* multipath count */
397 if (dummy > 1) { 446 if (mp_count > 1) {
398 printk(KERN_WARNING 447 printk(KERN_WARNING
399 "%s: Multipath count %d not supported, " 448 "%s: Multipath count %d not supported, "
400 "skipping all greater than 1\n", __func__, 449 "skipping all greater than 1\n", __func__,
401 dummy); 450 mp_count);
402 } 451 }
403 for (j = 0; j < dummy; j++) { 452 for (j = 0; j < mp_count; j++) {
404 if (j == 0) { 453 if (j == 0) {
405 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); 454 dsaddr->ds_list[i] = decode_and_add_ds(&stream,
455 ino);
406 if (dsaddr->ds_list[i] == NULL) 456 if (dsaddr->ds_list[i] == NULL)
407 goto out_err_free; 457 goto out_err_free_deviceid;
408 } else { 458 } else {
409 u32 len; 459 u32 len;
410 /* skip extra multipath */ 460 /* skip extra multipath */
411 len = be32_to_cpup(p++); 461
412 p += XDR_QUADLEN(len); 462 /* read len, skip */
413 len = be32_to_cpup(p++); 463 p = xdr_inline_decode(&stream, 4);
414 p += XDR_QUADLEN(len); 464 if (unlikely(!p))
415 continue; 465 goto out_err_free_deviceid;
466 len = be32_to_cpup(p);
467
468 p = xdr_inline_decode(&stream, len);
469 if (unlikely(!p))
470 goto out_err_free_deviceid;
471
472 /* read len, skip */
473 p = xdr_inline_decode(&stream, 4);
474 if (unlikely(!p))
475 goto out_err_free_deviceid;
476 len = be32_to_cpup(p);
477
478 p = xdr_inline_decode(&stream, len);
479 if (unlikely(!p))
480 goto out_err_free_deviceid;
416 } 481 }
417 } 482 }
418 } 483 }
484
485 __free_page(scratch);
419 return dsaddr; 486 return dsaddr;
420 487
421out_err_free: 488out_err_free_deviceid:
422 nfs4_fl_free_deviceid(dsaddr); 489 nfs4_fl_free_deviceid(dsaddr);
490 /* stripe_indicies was part of dsaddr */
491 goto out_err_free_scratch;
492out_err_free_stripe_indices:
493 kfree(stripe_indices);
494out_err_free_scratch:
495 __free_page(scratch);
423out_err: 496out_err:
424 dprintk("%s ERROR: returning NULL\n", __func__); 497 dprintk("%s ERROR: returning NULL\n", __func__);
425 return NULL; 498 return NULL;
@@ -498,11 +571,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
498 goto out_free; 571 goto out_free;
499 } 572 }
500 573
501 /* set pdev->area */
502 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
503 if (!pdev->area)
504 goto out_free;
505
506 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); 574 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
507 pdev->layout_type = LAYOUT_NFSV4_1_FILES; 575 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
508 pdev->pages = pages; 576 pdev->pages = pages;
@@ -521,8 +589,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
521 */ 589 */
522 dsaddr = decode_and_add_device(inode, pdev); 590 dsaddr = decode_and_add_device(inode, pdev);
523out_free: 591out_free:
524 if (pdev->area != NULL)
525 vunmap(pdev->area);
526 for (i = 0; i < max_pages; i++) 592 for (i = 0; i < max_pages; i++)
527 __free_page(pages[i]); 593 __free_page(pages[i]);
528 kfree(pages); 594 kfree(pages);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1d84e7088af9..9bf41eab3e46 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -41,6 +41,7 @@
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/sunrpc/clnt.h> 43#include <linux/sunrpc/clnt.h>
44#include <linux/sunrpc/gss_api.h>
44#include <linux/nfs.h> 45#include <linux/nfs.h>
45#include <linux/nfs4.h> 46#include <linux/nfs4.h>
46#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
@@ -71,7 +72,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data);
71static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 72static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
72static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 73static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
73static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 74static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
74static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 75static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir,
76 const struct qstr *name, struct nfs_fh *fhandle,
77 struct nfs_fattr *fattr);
75static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 78static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
76static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 79static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
77 struct nfs_fattr *fattr, struct iattr *sattr, 80 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -85,6 +88,8 @@ static int nfs4_map_errors(int err)
85 switch (err) { 88 switch (err) {
86 case -NFS4ERR_RESOURCE: 89 case -NFS4ERR_RESOURCE:
87 return -EREMOTEIO; 90 return -EREMOTEIO;
91 case -NFS4ERR_WRONGSEC:
92 return -EPERM;
88 case -NFS4ERR_BADOWNER: 93 case -NFS4ERR_BADOWNER:
89 case -NFS4ERR_BADNAME: 94 case -NFS4ERR_BADNAME:
90 return -EINVAL; 95 return -EINVAL;
@@ -657,7 +662,8 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = {
657 .rpc_call_done = nfs41_call_sync_done, 662 .rpc_call_done = nfs41_call_sync_done,
658}; 663};
659 664
660static int nfs4_call_sync_sequence(struct nfs_server *server, 665static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
666 struct nfs_server *server,
661 struct rpc_message *msg, 667 struct rpc_message *msg,
662 struct nfs4_sequence_args *args, 668 struct nfs4_sequence_args *args,
663 struct nfs4_sequence_res *res, 669 struct nfs4_sequence_res *res,
@@ -673,7 +679,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
673 .cache_reply = cache_reply, 679 .cache_reply = cache_reply,
674 }; 680 };
675 struct rpc_task_setup task_setup = { 681 struct rpc_task_setup task_setup = {
676 .rpc_client = server->client, 682 .rpc_client = clnt,
677 .rpc_message = msg, 683 .rpc_message = msg,
678 .callback_ops = &nfs41_call_sync_ops, 684 .callback_ops = &nfs41_call_sync_ops,
679 .callback_data = &data 685 .callback_data = &data
@@ -692,13 +698,14 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
692 return ret; 698 return ret;
693} 699}
694 700
695int _nfs4_call_sync_session(struct nfs_server *server, 701int _nfs4_call_sync_session(struct rpc_clnt *clnt,
702 struct nfs_server *server,
696 struct rpc_message *msg, 703 struct rpc_message *msg,
697 struct nfs4_sequence_args *args, 704 struct nfs4_sequence_args *args,
698 struct nfs4_sequence_res *res, 705 struct nfs4_sequence_res *res,
699 int cache_reply) 706 int cache_reply)
700{ 707{
701 return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0); 708 return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0);
702} 709}
703 710
704#else 711#else
@@ -709,19 +716,28 @@ static int nfs4_sequence_done(struct rpc_task *task,
709} 716}
710#endif /* CONFIG_NFS_V4_1 */ 717#endif /* CONFIG_NFS_V4_1 */
711 718
712int _nfs4_call_sync(struct nfs_server *server, 719int _nfs4_call_sync(struct rpc_clnt *clnt,
720 struct nfs_server *server,
713 struct rpc_message *msg, 721 struct rpc_message *msg,
714 struct nfs4_sequence_args *args, 722 struct nfs4_sequence_args *args,
715 struct nfs4_sequence_res *res, 723 struct nfs4_sequence_res *res,
716 int cache_reply) 724 int cache_reply)
717{ 725{
718 args->sa_session = res->sr_session = NULL; 726 args->sa_session = res->sr_session = NULL;
719 return rpc_call_sync(server->client, msg, 0); 727 return rpc_call_sync(clnt, msg, 0);
720} 728}
721 729
722#define nfs4_call_sync(server, msg, args, res, cache_reply) \ 730static inline
723 (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \ 731int nfs4_call_sync(struct rpc_clnt *clnt,
724 &(res)->seq_res, (cache_reply)) 732 struct nfs_server *server,
733 struct rpc_message *msg,
734 struct nfs4_sequence_args *args,
735 struct nfs4_sequence_res *res,
736 int cache_reply)
737{
738 return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
739 args, res, cache_reply);
740}
725 741
726static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 742static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
727{ 743{
@@ -1831,7 +1847,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1831 } else 1847 } else
1832 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1848 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
1833 1849
1834 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 1850 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
1835 if (status == 0 && state != NULL) 1851 if (status == 0 && state != NULL)
1836 renew_lease(server, timestamp); 1852 renew_lease(server, timestamp);
1837 return status; 1853 return status;
@@ -2090,7 +2106,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
2090 }; 2106 };
2091 int status; 2107 int status;
2092 2108
2093 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2109 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2094 if (status == 0) { 2110 if (status == 0) {
2095 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2111 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
2096 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| 2112 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
@@ -2160,7 +2176,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2160 }; 2176 };
2161 2177
2162 nfs_fattr_init(info->fattr); 2178 nfs_fattr_init(info->fattr);
2163 return nfs4_call_sync(server, &msg, &args, &res, 0); 2179 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2164} 2180}
2165 2181
2166static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 2182static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2176,15 +2192,41 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2176 return err; 2192 return err;
2177} 2193}
2178 2194
2195static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2196 struct nfs_fsinfo *info, rpc_authflavor_t flavor)
2197{
2198 struct rpc_auth *auth;
2199 int ret;
2200
2201 auth = rpcauth_create(flavor, server->client);
2202 if (!auth) {
2203 ret = -EIO;
2204 goto out;
2205 }
2206 ret = nfs4_lookup_root(server, fhandle, info);
2207out:
2208 return ret;
2209}
2210
2179/* 2211/*
2180 * get the file handle for the "/" directory on the server 2212 * get the file handle for the "/" directory on the server
2181 */ 2213 */
2182static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2214static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2183 struct nfs_fsinfo *info) 2215 struct nfs_fsinfo *info)
2184{ 2216{
2185 int status; 2217 int i, len, status = 0;
2218 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2];
2186 2219
2187 status = nfs4_lookup_root(server, fhandle, info); 2220 flav_array[0] = RPC_AUTH_UNIX;
2221 len = gss_mech_list_pseudoflavors(&flav_array[1]);
2222 flav_array[1+len] = RPC_AUTH_NULL;
2223 len += 2;
2224
2225 for (i = 0; i < len; i++) {
2226 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2227 if (status != -EPERM)
2228 break;
2229 }
2188 if (status == 0) 2230 if (status == 0)
2189 status = nfs4_server_capabilities(server, fhandle); 2231 status = nfs4_server_capabilities(server, fhandle);
2190 if (status == 0) 2232 if (status == 0)
@@ -2249,7 +2291,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
2249 }; 2291 };
2250 2292
2251 nfs_fattr_init(fattr); 2293 nfs_fattr_init(fattr);
2252 return nfs4_call_sync(server, &msg, &args, &res, 0); 2294 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2253} 2295}
2254 2296
2255static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2297static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -2309,9 +2351,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2309 return status; 2351 return status;
2310} 2352}
2311 2353
2312static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh, 2354static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
2313 const struct qstr *name, struct nfs_fh *fhandle, 2355 const struct nfs_fh *dirfh, const struct qstr *name,
2314 struct nfs_fattr *fattr) 2356 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2315{ 2357{
2316 int status; 2358 int status;
2317 struct nfs4_lookup_arg args = { 2359 struct nfs4_lookup_arg args = {
@@ -2333,7 +2375,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d
2333 nfs_fattr_init(fattr); 2375 nfs_fattr_init(fattr);
2334 2376
2335 dprintk("NFS call lookupfh %s\n", name->name); 2377 dprintk("NFS call lookupfh %s\n", name->name);
2336 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2378 status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
2337 dprintk("NFS reply lookupfh: %d\n", status); 2379 dprintk("NFS reply lookupfh: %d\n", status);
2338 return status; 2380 return status;
2339} 2381}
@@ -2345,7 +2387,7 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2345 struct nfs4_exception exception = { }; 2387 struct nfs4_exception exception = { };
2346 int err; 2388 int err;
2347 do { 2389 do {
2348 err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); 2390 err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr);
2349 /* FIXME: !!!! */ 2391 /* FIXME: !!!! */
2350 if (err == -NFS4ERR_MOVED) { 2392 if (err == -NFS4ERR_MOVED) {
2351 err = -EREMOTE; 2393 err = -EREMOTE;
@@ -2356,27 +2398,41 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2356 return err; 2398 return err;
2357} 2399}
2358 2400
2359static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, 2401static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
2360 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2402 const struct qstr *name, struct nfs_fh *fhandle,
2403 struct nfs_fattr *fattr)
2361{ 2404{
2362 int status; 2405 int status;
2363 2406
2364 dprintk("NFS call lookup %s\n", name->name); 2407 dprintk("NFS call lookup %s\n", name->name);
2365 status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); 2408 status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
2366 if (status == -NFS4ERR_MOVED) 2409 if (status == -NFS4ERR_MOVED)
2367 status = nfs4_get_referral(dir, name, fattr, fhandle); 2410 status = nfs4_get_referral(dir, name, fattr, fhandle);
2368 dprintk("NFS reply lookup: %d\n", status); 2411 dprintk("NFS reply lookup: %d\n", status);
2369 return status; 2412 return status;
2370} 2413}
2371 2414
2372static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2415void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh)
2416{
2417 memset(fh, 0, sizeof(struct nfs_fh));
2418 fattr->fsid.major = 1;
2419 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
2420 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT;
2421 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
2422 fattr->nlink = 2;
2423}
2424
2425static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
2426 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2373{ 2427{
2374 struct nfs4_exception exception = { }; 2428 struct nfs4_exception exception = { };
2375 int err; 2429 int err;
2376 do { 2430 do {
2377 err = nfs4_handle_exception(NFS_SERVER(dir), 2431 err = nfs4_handle_exception(NFS_SERVER(dir),
2378 _nfs4_proc_lookup(dir, name, fhandle, fattr), 2432 _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr),
2379 &exception); 2433 &exception);
2434 if (err == -EPERM)
2435 nfs_fixup_secinfo_attributes(fattr, fhandle);
2380 } while (exception.retry); 2436 } while (exception.retry);
2381 return err; 2437 return err;
2382} 2438}
@@ -2421,7 +2477,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2421 if (res.fattr == NULL) 2477 if (res.fattr == NULL)
2422 return -ENOMEM; 2478 return -ENOMEM;
2423 2479
2424 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2480 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2425 if (!status) { 2481 if (!status) {
2426 entry->mask = 0; 2482 entry->mask = 0;
2427 if (res.access & NFS4_ACCESS_READ) 2483 if (res.access & NFS4_ACCESS_READ)
@@ -2488,7 +2544,7 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
2488 .rpc_resp = &res, 2544 .rpc_resp = &res,
2489 }; 2545 };
2490 2546
2491 return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 2547 return nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
2492} 2548}
2493 2549
2494static int nfs4_proc_readlink(struct inode *inode, struct page *page, 2550static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2577,7 +2633,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2577 if (res.dir_attr == NULL) 2633 if (res.dir_attr == NULL)
2578 goto out; 2634 goto out;
2579 2635
2580 status = nfs4_call_sync(server, &msg, &args, &res, 1); 2636 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2581 if (status == 0) { 2637 if (status == 0) {
2582 update_changeattr(dir, &res.cinfo); 2638 update_changeattr(dir, &res.cinfo);
2583 nfs_post_op_update_inode(dir, res.dir_attr); 2639 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2678,7 +2734,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2678 if (res.old_fattr == NULL || res.new_fattr == NULL) 2734 if (res.old_fattr == NULL || res.new_fattr == NULL)
2679 goto out; 2735 goto out;
2680 2736
2681 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2737 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2682 if (!status) { 2738 if (!status) {
2683 update_changeattr(old_dir, &res.old_cinfo); 2739 update_changeattr(old_dir, &res.old_cinfo);
2684 nfs_post_op_update_inode(old_dir, res.old_fattr); 2740 nfs_post_op_update_inode(old_dir, res.old_fattr);
@@ -2729,7 +2785,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2729 if (res.fattr == NULL || res.dir_attr == NULL) 2785 if (res.fattr == NULL || res.dir_attr == NULL)
2730 goto out; 2786 goto out;
2731 2787
2732 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2788 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2733 if (!status) { 2789 if (!status) {
2734 update_changeattr(dir, &res.cinfo); 2790 update_changeattr(dir, &res.cinfo);
2735 nfs_post_op_update_inode(dir, res.dir_attr); 2791 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2792,8 +2848,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
2792 2848
2793static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) 2849static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
2794{ 2850{
2795 int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, 2851 int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
2796 &data->arg, &data->res, 1); 2852 &data->arg.seq_args, &data->res.seq_res, 1);
2797 if (status == 0) { 2853 if (status == 0) {
2798 update_changeattr(dir, &data->res.dir_cinfo); 2854 update_changeattr(dir, &data->res.dir_cinfo);
2799 nfs_post_op_update_inode(dir, data->res.dir_fattr); 2855 nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2905,7 +2961,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2905 (unsigned long long)cookie); 2961 (unsigned long long)cookie);
2906 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 2962 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
2907 res.pgbase = args.pgbase; 2963 res.pgbase = args.pgbase;
2908 status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); 2964 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
2909 if (status >= 0) { 2965 if (status >= 0) {
2910 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2966 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2911 status += args.pgbase; 2967 status += args.pgbase;
@@ -2997,7 +3053,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
2997 }; 3053 };
2998 3054
2999 nfs_fattr_init(fsstat->fattr); 3055 nfs_fattr_init(fsstat->fattr);
3000 return nfs4_call_sync(server, &msg, &args, &res, 0); 3056 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3001} 3057}
3002 3058
3003static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) 3059static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -3028,7 +3084,7 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
3028 .rpc_resp = &res, 3084 .rpc_resp = &res,
3029 }; 3085 };
3030 3086
3031 return nfs4_call_sync(server, &msg, &args, &res, 0); 3087 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3032} 3088}
3033 3089
3034static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) 3090static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -3073,7 +3129,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
3073 } 3129 }
3074 3130
3075 nfs_fattr_init(pathconf->fattr); 3131 nfs_fattr_init(pathconf->fattr);
3076 return nfs4_call_sync(server, &msg, &args, &res, 0); 3132 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3077} 3133}
3078 3134
3079static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 3135static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -3195,12 +3251,9 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3195 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 3251 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
3196} 3252}
3197 3253
3198static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3254static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3199{ 3255{
3200 struct inode *inode = data->inode; 3256 struct inode *inode = data->inode;
3201
3202 if (!nfs4_sequence_done(task, &data->res.seq_res))
3203 return -EAGAIN;
3204 3257
3205 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3258 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3206 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3259 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3210,11 +3263,24 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3210 return 0; 3263 return 0;
3211} 3264}
3212 3265
3266static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3267{
3268 if (!nfs4_sequence_done(task, &data->res.seq_res))
3269 return -EAGAIN;
3270 return data->write_done_cb(task, data);
3271}
3272
3213static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3273static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
3214{ 3274{
3215 struct nfs_server *server = NFS_SERVER(data->inode); 3275 struct nfs_server *server = NFS_SERVER(data->inode);
3216 3276
3217 data->args.bitmask = server->cache_consistency_bitmask; 3277 if (data->lseg) {
3278 data->args.bitmask = NULL;
3279 data->res.fattr = NULL;
3280 } else
3281 data->args.bitmask = server->cache_consistency_bitmask;
3282 if (!data->write_done_cb)
3283 data->write_done_cb = nfs4_commit_done_cb;
3218 data->res.server = server; 3284 data->res.server = server;
3219 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3285 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3220} 3286}
@@ -3452,7 +3518,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3452 resp_buf = buf; 3518 resp_buf = buf;
3453 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); 3519 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
3454 } 3520 }
3455 ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 3521 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
3456 if (ret) 3522 if (ret)
3457 goto out_free; 3523 goto out_free;
3458 if (res.acl_len > args.acl_len) 3524 if (res.acl_len > args.acl_len)
@@ -3527,7 +3593,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3527 if (i < 0) 3593 if (i < 0)
3528 return i; 3594 return i;
3529 nfs_inode_return_delegation(inode); 3595 nfs_inode_return_delegation(inode);
3530 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3596 ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3531 3597
3532 /* 3598 /*
3533 * Free each page after tx, so the only ref left is 3599 * Free each page after tx, so the only ref left is
@@ -3890,7 +3956,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3890 lsp = request->fl_u.nfs4_fl.owner; 3956 lsp = request->fl_u.nfs4_fl.owner;
3891 arg.lock_owner.id = lsp->ls_id.id; 3957 arg.lock_owner.id = lsp->ls_id.id;
3892 arg.lock_owner.s_dev = server->s_dev; 3958 arg.lock_owner.s_dev = server->s_dev;
3893 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 3959 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3894 switch (status) { 3960 switch (status) {
3895 case 0: 3961 case 0:
3896 request->fl_type = F_UNLCK; 3962 request->fl_type = F_UNLCK;
@@ -4618,12 +4684,46 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4618 nfs_fattr_init(&fs_locations->fattr); 4684 nfs_fattr_init(&fs_locations->fattr);
4619 fs_locations->server = server; 4685 fs_locations->server = server;
4620 fs_locations->nlocations = 0; 4686 fs_locations->nlocations = 0;
4621 status = nfs4_call_sync(server, &msg, &args, &res, 0); 4687 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
4622 nfs_fixup_referral_attributes(&fs_locations->fattr); 4688 nfs_fixup_referral_attributes(&fs_locations->fattr);
4623 dprintk("%s: returned status = %d\n", __func__, status); 4689 dprintk("%s: returned status = %d\n", __func__, status);
4624 return status; 4690 return status;
4625} 4691}
4626 4692
4693static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4694{
4695 int status;
4696 struct nfs4_secinfo_arg args = {
4697 .dir_fh = NFS_FH(dir),
4698 .name = name,
4699 };
4700 struct nfs4_secinfo_res res = {
4701 .flavors = flavors,
4702 };
4703 struct rpc_message msg = {
4704 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO],
4705 .rpc_argp = &args,
4706 .rpc_resp = &res,
4707 };
4708
4709 dprintk("NFS call secinfo %s\n", name->name);
4710 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
4711 dprintk("NFS reply secinfo: %d\n", status);
4712 return status;
4713}
4714
4715int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4716{
4717 struct nfs4_exception exception = { };
4718 int err;
4719 do {
4720 err = nfs4_handle_exception(NFS_SERVER(dir),
4721 _nfs4_proc_secinfo(dir, name, flavors),
4722 &exception);
4723 } while (exception.retry);
4724 return err;
4725}
4726
4627#ifdef CONFIG_NFS_V4_1 4727#ifdef CONFIG_NFS_V4_1
4628/* 4728/*
4629 * Check the exchange flags returned by the server for invalid flags, having 4729 * Check the exchange flags returned by the server for invalid flags, having
@@ -5516,8 +5616,6 @@ static void nfs4_layoutget_release(void *calldata)
5516 struct nfs4_layoutget *lgp = calldata; 5616 struct nfs4_layoutget *lgp = calldata;
5517 5617
5518 dprintk("--> %s\n", __func__); 5618 dprintk("--> %s\n", __func__);
5519 if (lgp->res.layout.buf != NULL)
5520 free_page((unsigned long) lgp->res.layout.buf);
5521 put_nfs_open_context(lgp->args.ctx); 5619 put_nfs_open_context(lgp->args.ctx);
5522 kfree(calldata); 5620 kfree(calldata);
5523 dprintk("<-- %s\n", __func__); 5621 dprintk("<-- %s\n", __func__);
@@ -5549,12 +5647,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5549 5647
5550 dprintk("--> %s\n", __func__); 5648 dprintk("--> %s\n", __func__);
5551 5649
5552 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); 5650 lgp->res.layoutp = &lgp->args.layout;
5553 if (lgp->res.layout.buf == NULL) {
5554 nfs4_layoutget_release(lgp);
5555 return -ENOMEM;
5556 }
5557
5558 lgp->res.seq_res.sr_slot = NULL; 5651 lgp->res.seq_res.sr_slot = NULL;
5559 task = rpc_run_task(&task_setup_data); 5652 task = rpc_run_task(&task_setup_data);
5560 if (IS_ERR(task)) 5653 if (IS_ERR(task))
@@ -5586,7 +5679,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5586 int status; 5679 int status;
5587 5680
5588 dprintk("--> %s\n", __func__); 5681 dprintk("--> %s\n", __func__);
5589 status = nfs4_call_sync(server, &msg, &args, &res, 0); 5682 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
5590 dprintk("<-- %s status=%d\n", __func__, status); 5683 dprintk("<-- %s status=%d\n", __func__, status);
5591 5684
5592 return status; 5685 return status;
@@ -5606,6 +5699,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5606} 5699}
5607EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); 5700EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5608 5701
5702static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
5703{
5704 struct nfs4_layoutcommit_data *data = calldata;
5705 struct nfs_server *server = NFS_SERVER(data->args.inode);
5706
5707 if (nfs4_setup_sequence(server, &data->args.seq_args,
5708 &data->res.seq_res, 1, task))
5709 return;
5710 rpc_call_start(task);
5711}
5712
5713static void
5714nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5715{
5716 struct nfs4_layoutcommit_data *data = calldata;
5717 struct nfs_server *server = NFS_SERVER(data->args.inode);
5718
5719 if (!nfs4_sequence_done(task, &data->res.seq_res))
5720 return;
5721
5722 switch (task->tk_status) { /* Just ignore these failures */
5723 case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
5724 case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
5725 case NFS4ERR_BADLAYOUT: /* no layout */
5726 case NFS4ERR_GRACE: /* loca_recalim always false */
5727 task->tk_status = 0;
5728 }
5729
5730 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5731 nfs_restart_rpc(task, server->nfs_client);
5732 return;
5733 }
5734
5735 if (task->tk_status == 0)
5736 nfs_post_op_update_inode_force_wcc(data->args.inode,
5737 data->res.fattr);
5738}
5739
5740static void nfs4_layoutcommit_release(void *calldata)
5741{
5742 struct nfs4_layoutcommit_data *data = calldata;
5743
5744 /* Matched by references in pnfs_set_layoutcommit */
5745 put_lseg(data->lseg);
5746 put_rpccred(data->cred);
5747 kfree(data);
5748}
5749
5750static const struct rpc_call_ops nfs4_layoutcommit_ops = {
5751 .rpc_call_prepare = nfs4_layoutcommit_prepare,
5752 .rpc_call_done = nfs4_layoutcommit_done,
5753 .rpc_release = nfs4_layoutcommit_release,
5754};
5755
5756int
5757nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
5758{
5759 struct rpc_message msg = {
5760 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
5761 .rpc_argp = &data->args,
5762 .rpc_resp = &data->res,
5763 .rpc_cred = data->cred,
5764 };
5765 struct rpc_task_setup task_setup_data = {
5766 .task = &data->task,
5767 .rpc_client = NFS_CLIENT(data->args.inode),
5768 .rpc_message = &msg,
5769 .callback_ops = &nfs4_layoutcommit_ops,
5770 .callback_data = data,
5771 .flags = RPC_TASK_ASYNC,
5772 };
5773 struct rpc_task *task;
5774 int status = 0;
5775
5776 dprintk("NFS: %4d initiating layoutcommit call. sync %d "
5777 "lbw: %llu inode %lu\n",
5778 data->task.tk_pid, sync,
5779 data->args.lastbytewritten,
5780 data->args.inode->i_ino);
5781
5782 task = rpc_run_task(&task_setup_data);
5783 if (IS_ERR(task))
5784 return PTR_ERR(task);
5785 if (sync == false)
5786 goto out;
5787 status = nfs4_wait_for_completion_rpc_task(task);
5788 if (status != 0)
5789 goto out;
5790 status = task->tk_status;
5791out:
5792 dprintk("%s: status %d\n", __func__, status);
5793 rpc_put_task(task);
5794 return status;
5795}
5609#endif /* CONFIG_NFS_V4_1 */ 5796#endif /* CONFIG_NFS_V4_1 */
5610 5797
5611struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5798struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5741,6 +5928,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5741 .close_context = nfs4_close_context, 5928 .close_context = nfs4_close_context,
5742 .open_context = nfs4_atomic_open, 5929 .open_context = nfs4_atomic_open,
5743 .init_client = nfs4_init_client, 5930 .init_client = nfs4_init_client,
5931 .secinfo = nfs4_proc_secinfo,
5744}; 5932};
5745 5933
5746static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 5934static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ab1bf5bb021f..a6804f704d9d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -590,7 +590,8 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
590 state->owner = owner; 590 state->owner = owner;
591 atomic_inc(&owner->so_count); 591 atomic_inc(&owner->so_count);
592 list_add(&state->inode_states, &nfsi->open_states); 592 list_add(&state->inode_states, &nfsi->open_states);
593 state->inode = igrab(inode); 593 ihold(inode);
594 state->inode = inode;
594 spin_unlock(&inode->i_lock); 595 spin_unlock(&inode->i_lock);
595 /* Note: The reclaim code dictates that we add stateless 596 /* Note: The reclaim code dictates that we add stateless
596 * and read-only stateids to the end of the list */ 597 * and read-only stateids to the end of the list */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0cf560f77884..dddfb5795d7b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -46,6 +46,7 @@
46#include <linux/kdev_t.h> 46#include <linux/kdev_t.h>
47#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/msg_prot.h> 48#include <linux/sunrpc/msg_prot.h>
49#include <linux/sunrpc/gss_api.h>
49#include <linux/nfs.h> 50#include <linux/nfs.h>
50#include <linux/nfs4.h> 51#include <linux/nfs4.h>
51#include <linux/nfs_fs.h> 52#include <linux/nfs_fs.h>
@@ -112,7 +113,7 @@ static int nfs4_stat_to_errno(int);
112#define encode_restorefh_maxsz (op_encode_hdr_maxsz) 113#define encode_restorefh_maxsz (op_encode_hdr_maxsz)
113#define decode_restorefh_maxsz (op_decode_hdr_maxsz) 114#define decode_restorefh_maxsz (op_decode_hdr_maxsz)
114#define encode_fsinfo_maxsz (encode_getattr_maxsz) 115#define encode_fsinfo_maxsz (encode_getattr_maxsz)
115#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) 116#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15)
116#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) 117#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
117#define decode_renew_maxsz (op_decode_hdr_maxsz) 118#define decode_renew_maxsz (op_decode_hdr_maxsz)
118#define encode_setclientid_maxsz \ 119#define encode_setclientid_maxsz \
@@ -253,6 +254,8 @@ static int nfs4_stat_to_errno(int);
253 (encode_getattr_maxsz) 254 (encode_getattr_maxsz)
254#define decode_fs_locations_maxsz \ 255#define decode_fs_locations_maxsz \
255 (0) 256 (0)
257#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
258#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
256 259
257#if defined(CONFIG_NFS_V4_1) 260#if defined(CONFIG_NFS_V4_1)
258#define NFS4_MAX_MACHINE_NAME_LEN (64) 261#define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -324,6 +327,18 @@ static int nfs4_stat_to_errno(int);
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ 327#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \ 328 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) 329 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
330#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
331 2 /* offset */ + \
332 2 /* length */ + \
333 1 /* reclaim */ + \
334 encode_stateid_maxsz + \
335 1 /* new offset (true) */ + \
336 2 /* last byte written */ + \
337 1 /* nt_timechanged (false) */ + \
338 1 /* layoutupdate4 layout type */ + \
339 1 /* NULL filelayout layoutupdate4 payload */)
340#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
341
327#else /* CONFIG_NFS_V4_1 */ 342#else /* CONFIG_NFS_V4_1 */
328#define encode_sequence_maxsz 0 343#define encode_sequence_maxsz 0
329#define decode_sequence_maxsz 0 344#define decode_sequence_maxsz 0
@@ -676,6 +691,14 @@ static int nfs4_stat_to_errno(int);
676 decode_putfh_maxsz + \ 691 decode_putfh_maxsz + \
677 decode_lookup_maxsz + \ 692 decode_lookup_maxsz + \
678 decode_fs_locations_maxsz) 693 decode_fs_locations_maxsz)
694#define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \
695 encode_sequence_maxsz + \
696 encode_putfh_maxsz + \
697 encode_secinfo_maxsz)
698#define NFS4_dec_secinfo_sz (compound_decode_hdr_maxsz + \
699 decode_sequence_maxsz + \
700 decode_putfh_maxsz + \
701 decode_secinfo_maxsz)
679#if defined(CONFIG_NFS_V4_1) 702#if defined(CONFIG_NFS_V4_1)
680#define NFS4_enc_exchange_id_sz \ 703#define NFS4_enc_exchange_id_sz \
681 (compound_encode_hdr_maxsz + \ 704 (compound_encode_hdr_maxsz + \
@@ -727,6 +750,17 @@ static int nfs4_stat_to_errno(int);
727 decode_sequence_maxsz + \ 750 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \ 751 decode_putfh_maxsz + \
729 decode_layoutget_maxsz) 752 decode_layoutget_maxsz)
753#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
754 encode_sequence_maxsz +\
755 encode_putfh_maxsz + \
756 encode_layoutcommit_maxsz + \
757 encode_getattr_maxsz)
758#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
759 decode_sequence_maxsz + \
760 decode_putfh_maxsz + \
761 decode_layoutcommit_maxsz + \
762 decode_getattr_maxsz)
763
730 764
731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 765const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
732 compound_encode_hdr_maxsz + 766 compound_encode_hdr_maxsz +
@@ -1620,6 +1654,18 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1620 hdr->replen += decode_delegreturn_maxsz; 1654 hdr->replen += decode_delegreturn_maxsz;
1621} 1655}
1622 1656
1657static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1658{
1659 int len = name->len;
1660 __be32 *p;
1661
1662 p = reserve_space(xdr, 8 + len);
1663 *p++ = cpu_to_be32(OP_SECINFO);
1664 xdr_encode_opaque(p, name->name, len);
1665 hdr->nops++;
1666 hdr->replen += decode_secinfo_maxsz;
1667}
1668
1623#if defined(CONFIG_NFS_V4_1) 1669#if defined(CONFIG_NFS_V4_1)
1624/* NFSv4.1 operations */ 1670/* NFSv4.1 operations */
1625static void encode_exchange_id(struct xdr_stream *xdr, 1671static void encode_exchange_id(struct xdr_stream *xdr,
@@ -1816,6 +1862,34 @@ encode_layoutget(struct xdr_stream *xdr,
1816 hdr->nops++; 1862 hdr->nops++;
1817 hdr->replen += decode_layoutget_maxsz; 1863 hdr->replen += decode_layoutget_maxsz;
1818} 1864}
1865
1866static int
1867encode_layoutcommit(struct xdr_stream *xdr,
1868 const struct nfs4_layoutcommit_args *args,
1869 struct compound_hdr *hdr)
1870{
1871 __be32 *p;
1872
1873 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
1874 NFS_SERVER(args->inode)->pnfs_curr_ld->id);
1875
1876 p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
1877 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
1878 /* Only whole file layouts */
1879 p = xdr_encode_hyper(p, 0); /* offset */
1880 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */
1881 *p++ = cpu_to_be32(0); /* reclaim */
1882 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
1883 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
1884 p = xdr_encode_hyper(p, args->lastbytewritten);
1885 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
1886 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
1887 *p++ = cpu_to_be32(0); /* no file layout payload */
1888
1889 hdr->nops++;
1890 hdr->replen += decode_layoutcommit_maxsz;
1891 return 0;
1892}
1819#endif /* CONFIG_NFS_V4_1 */ 1893#endif /* CONFIG_NFS_V4_1 */
1820 1894
1821/* 1895/*
@@ -2294,7 +2368,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2294 encode_sequence(xdr, &args->seq_args, &hdr); 2368 encode_sequence(xdr, &args->seq_args, &hdr);
2295 encode_putfh(xdr, args->fh, &hdr); 2369 encode_putfh(xdr, args->fh, &hdr);
2296 encode_commit(xdr, args, &hdr); 2370 encode_commit(xdr, args, &hdr);
2297 encode_getfattr(xdr, args->bitmask, &hdr); 2371 if (args->bitmask)
2372 encode_getfattr(xdr, args->bitmask, &hdr);
2298 encode_nops(&hdr); 2373 encode_nops(&hdr);
2299} 2374}
2300 2375
@@ -2465,6 +2540,24 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
2465 encode_nops(&hdr); 2540 encode_nops(&hdr);
2466} 2541}
2467 2542
2543/*
2544 * Encode SECINFO request
2545 */
2546static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
2547 struct xdr_stream *xdr,
2548 struct nfs4_secinfo_arg *args)
2549{
2550 struct compound_hdr hdr = {
2551 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2552 };
2553
2554 encode_compound_hdr(xdr, req, &hdr);
2555 encode_sequence(xdr, &args->seq_args, &hdr);
2556 encode_putfh(xdr, args->dir_fh, &hdr);
2557 encode_secinfo(xdr, args->name, &hdr);
2558 encode_nops(&hdr);
2559}
2560
2468#if defined(CONFIG_NFS_V4_1) 2561#if defined(CONFIG_NFS_V4_1)
2469/* 2562/*
2470 * EXCHANGE_ID request 2563 * EXCHANGE_ID request
@@ -2604,8 +2697,32 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
2604 encode_sequence(xdr, &args->seq_args, &hdr); 2697 encode_sequence(xdr, &args->seq_args, &hdr);
2605 encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2698 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2606 encode_layoutget(xdr, args, &hdr); 2699 encode_layoutget(xdr, args, &hdr);
2700
2701 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
2702 args->layout.pages, 0, args->layout.pglen);
2703
2607 encode_nops(&hdr); 2704 encode_nops(&hdr);
2608} 2705}
2706
2707/*
2708 * Encode LAYOUTCOMMIT request
2709 */
2710static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
2711 struct xdr_stream *xdr,
2712 struct nfs4_layoutcommit_args *args)
2713{
2714 struct compound_hdr hdr = {
2715 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2716 };
2717
2718 encode_compound_hdr(xdr, req, &hdr);
2719 encode_sequence(xdr, &args->seq_args, &hdr);
2720 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2721 encode_layoutcommit(xdr, args, &hdr);
2722 encode_getfattr(xdr, args->bitmask, &hdr);
2723 encode_nops(&hdr);
2724 return 0;
2725}
2609#endif /* CONFIG_NFS_V4_1 */ 2726#endif /* CONFIG_NFS_V4_1 */
2610 2727
2611static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2728static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2925,6 +3042,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2925 if (unlikely(!p)) 3042 if (unlikely(!p))
2926 goto out_overflow; 3043 goto out_overflow;
2927 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 3044 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3045 return -be32_to_cpup(p);
2928 } 3046 }
2929 return 0; 3047 return 0;
2930out_overflow: 3048out_overflow:
@@ -3912,6 +4030,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3912 fattr->valid |= status; 4030 fattr->valid |= status;
3913 4031
3914 status = decode_attr_error(xdr, bitmap); 4032 status = decode_attr_error(xdr, bitmap);
4033 if (status == -NFS4ERR_WRONGSEC) {
4034 nfs_fixup_secinfo_attributes(fattr, fh);
4035 status = 0;
4036 }
3915 if (status < 0) 4037 if (status < 0)
3916 goto xdr_error; 4038 goto xdr_error;
3917 4039
@@ -4680,6 +4802,73 @@ static int decode_delegreturn(struct xdr_stream *xdr)
4680 return decode_op_hdr(xdr, OP_DELEGRETURN); 4802 return decode_op_hdr(xdr, OP_DELEGRETURN);
4681} 4803}
4682 4804
4805static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor)
4806{
4807 __be32 *p;
4808
4809 p = xdr_inline_decode(xdr, 4);
4810 if (unlikely(!p))
4811 goto out_overflow;
4812 flavor->gss.sec_oid4.len = be32_to_cpup(p);
4813 if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN)
4814 goto out_err;
4815
4816 p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len);
4817 if (unlikely(!p))
4818 goto out_overflow;
4819 memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len);
4820
4821 p = xdr_inline_decode(xdr, 8);
4822 if (unlikely(!p))
4823 goto out_overflow;
4824 flavor->gss.qop4 = be32_to_cpup(p++);
4825 flavor->gss.service = be32_to_cpup(p);
4826
4827 return 0;
4828
4829out_overflow:
4830 print_overflow_msg(__func__, xdr);
4831 return -EIO;
4832out_err:
4833 return -EINVAL;
4834}
4835
4836static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4837{
4838 struct nfs4_secinfo_flavor *sec_flavor;
4839 int status;
4840 __be32 *p;
4841 int i;
4842
4843 status = decode_op_hdr(xdr, OP_SECINFO);
4844 p = xdr_inline_decode(xdr, 4);
4845 if (unlikely(!p))
4846 goto out_overflow;
4847 res->flavors->num_flavors = be32_to_cpup(p);
4848
4849 for (i = 0; i < res->flavors->num_flavors; i++) {
4850 sec_flavor = &res->flavors->flavors[i];
4851 if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE)
4852 break;
4853
4854 p = xdr_inline_decode(xdr, 4);
4855 if (unlikely(!p))
4856 goto out_overflow;
4857 sec_flavor->flavor = be32_to_cpup(p);
4858
4859 if (sec_flavor->flavor == RPC_AUTH_GSS) {
4860 if (decode_secinfo_gss(xdr, sec_flavor))
4861 break;
4862 }
4863 }
4864
4865 return 0;
4866
4867out_overflow:
4868 print_overflow_msg(__func__, xdr);
4869 return -EIO;
4870}
4871
4683#if defined(CONFIG_NFS_V4_1) 4872#if defined(CONFIG_NFS_V4_1)
4684static int decode_exchange_id(struct xdr_stream *xdr, 4873static int decode_exchange_id(struct xdr_stream *xdr,
4685 struct nfs41_exchange_id_res *res) 4874 struct nfs41_exchange_id_res *res)
@@ -4950,6 +5139,9 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4950 __be32 *p; 5139 __be32 *p;
4951 int status; 5140 int status;
4952 u32 layout_count; 5141 u32 layout_count;
5142 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
5143 struct kvec *iov = rcvbuf->head;
5144 u32 hdrlen, recvd;
4953 5145
4954 status = decode_op_hdr(xdr, OP_LAYOUTGET); 5146 status = decode_op_hdr(xdr, OP_LAYOUTGET);
4955 if (status) 5147 if (status)
@@ -4966,17 +5158,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4966 return -EINVAL; 5158 return -EINVAL;
4967 } 5159 }
4968 5160
4969 p = xdr_inline_decode(xdr, 24); 5161 p = xdr_inline_decode(xdr, 28);
4970 if (unlikely(!p)) 5162 if (unlikely(!p))
4971 goto out_overflow; 5163 goto out_overflow;
4972 p = xdr_decode_hyper(p, &res->range.offset); 5164 p = xdr_decode_hyper(p, &res->range.offset);
4973 p = xdr_decode_hyper(p, &res->range.length); 5165 p = xdr_decode_hyper(p, &res->range.length);
4974 res->range.iomode = be32_to_cpup(p++); 5166 res->range.iomode = be32_to_cpup(p++);
4975 res->type = be32_to_cpup(p++); 5167 res->type = be32_to_cpup(p++);
4976 5168 res->layoutp->len = be32_to_cpup(p);
4977 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
4978 if (unlikely(status))
4979 return status;
4980 5169
4981 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", 5170 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
4982 __func__, 5171 __func__,
@@ -4984,12 +5173,18 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4984 (unsigned long)res->range.length, 5173 (unsigned long)res->range.length,
4985 res->range.iomode, 5174 res->range.iomode,
4986 res->type, 5175 res->type,
4987 res->layout.len); 5176 res->layoutp->len);
4988 5177
4989 /* nfs4_proc_layoutget allocated a single page */ 5178 hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
4990 if (res->layout.len > PAGE_SIZE) 5179 recvd = req->rq_rcv_buf.len - hdrlen;
4991 return -ENOMEM; 5180 if (res->layoutp->len > recvd) {
4992 memcpy(res->layout.buf, p, res->layout.len); 5181 dprintk("NFS: server cheating in layoutget reply: "
5182 "layout len %u > recvd %u\n",
5183 res->layoutp->len, recvd);
5184 return -EINVAL;
5185 }
5186
5187 xdr_read_pages(xdr, res->layoutp->len);
4993 5188
4994 if (layout_count > 1) { 5189 if (layout_count > 1) {
4995 /* We only handle a length one array at the moment. Any 5190 /* We only handle a length one array at the moment. Any
@@ -5006,6 +5201,35 @@ out_overflow:
5006 print_overflow_msg(__func__, xdr); 5201 print_overflow_msg(__func__, xdr);
5007 return -EIO; 5202 return -EIO;
5008} 5203}
5204
5205static int decode_layoutcommit(struct xdr_stream *xdr,
5206 struct rpc_rqst *req,
5207 struct nfs4_layoutcommit_res *res)
5208{
5209 __be32 *p;
5210 __u32 sizechanged;
5211 int status;
5212
5213 status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
5214 if (status)
5215 return status;
5216
5217 p = xdr_inline_decode(xdr, 4);
5218 if (unlikely(!p))
5219 goto out_overflow;
5220 sizechanged = be32_to_cpup(p);
5221
5222 if (sizechanged) {
5223 /* throw away new size */
5224 p = xdr_inline_decode(xdr, 8);
5225 if (unlikely(!p))
5226 goto out_overflow;
5227 }
5228 return 0;
5229out_overflow:
5230 print_overflow_msg(__func__, xdr);
5231 return -EIO;
5232}
5009#endif /* CONFIG_NFS_V4_1 */ 5233#endif /* CONFIG_NFS_V4_1 */
5010 5234
5011/* 5235/*
@@ -5723,8 +5947,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5723 status = decode_commit(xdr, res); 5947 status = decode_commit(xdr, res);
5724 if (status) 5948 if (status)
5725 goto out; 5949 goto out;
5726 decode_getfattr(xdr, res->fattr, res->server, 5950 if (res->fattr)
5727 !RPC_IS_ASYNC(rqstp->rq_task)); 5951 decode_getfattr(xdr, res->fattr, res->server,
5952 !RPC_IS_ASYNC(rqstp->rq_task));
5728out: 5953out:
5729 return status; 5954 return status;
5730} 5955}
@@ -5919,6 +6144,32 @@ out:
5919 return status; 6144 return status;
5920} 6145}
5921 6146
6147/*
6148 * Decode SECINFO response
6149 */
6150static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
6151 struct xdr_stream *xdr,
6152 struct nfs4_secinfo_res *res)
6153{
6154 struct compound_hdr hdr;
6155 int status;
6156
6157 status = decode_compound_hdr(xdr, &hdr);
6158 if (status)
6159 goto out;
6160 status = decode_sequence(xdr, &res->seq_res, rqstp);
6161 if (status)
6162 goto out;
6163 status = decode_putfh(xdr);
6164 if (status)
6165 goto out;
6166 status = decode_secinfo(xdr, res);
6167 if (status)
6168 goto out;
6169out:
6170 return status;
6171}
6172
5922#if defined(CONFIG_NFS_V4_1) 6173#if defined(CONFIG_NFS_V4_1)
5923/* 6174/*
5924 * Decode EXCHANGE_ID response 6175 * Decode EXCHANGE_ID response
@@ -6066,6 +6317,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
6066out: 6317out:
6067 return status; 6318 return status;
6068} 6319}
6320
6321/*
6322 * Decode LAYOUTCOMMIT response
6323 */
6324static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6325 struct xdr_stream *xdr,
6326 struct nfs4_layoutcommit_res *res)
6327{
6328 struct compound_hdr hdr;
6329 int status;
6330
6331 status = decode_compound_hdr(xdr, &hdr);
6332 if (status)
6333 goto out;
6334 status = decode_sequence(xdr, &res->seq_res, rqstp);
6335 if (status)
6336 goto out;
6337 status = decode_putfh(xdr);
6338 if (status)
6339 goto out;
6340 status = decode_layoutcommit(xdr, rqstp, res);
6341 if (status)
6342 goto out;
6343 decode_getfattr(xdr, res->fattr, res->server,
6344 !RPC_IS_ASYNC(rqstp->rq_task));
6345out:
6346 return status;
6347}
6069#endif /* CONFIG_NFS_V4_1 */ 6348#endif /* CONFIG_NFS_V4_1 */
6070 6349
6071/** 6350/**
@@ -6180,10 +6459,6 @@ static struct {
6180 { NFS4ERR_SYMLINK, -ELOOP }, 6459 { NFS4ERR_SYMLINK, -ELOOP },
6181 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, 6460 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
6182 { NFS4ERR_DEADLOCK, -EDEADLK }, 6461 { NFS4ERR_DEADLOCK, -EDEADLK },
6183 { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs
6184 * to be handled by a
6185 * middle-layer.
6186 */
6187 { -1, -EIO } 6462 { -1, -EIO }
6188}; 6463};
6189 6464
@@ -6258,6 +6533,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6258 PROC(SETACL, enc_setacl, dec_setacl), 6533 PROC(SETACL, enc_setacl, dec_setacl),
6259 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 6534 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
6260 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), 6535 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
6536 PROC(SECINFO, enc_secinfo, dec_secinfo),
6261#if defined(CONFIG_NFS_V4_1) 6537#if defined(CONFIG_NFS_V4_1)
6262 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 6538 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
6263 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 6539 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
@@ -6267,6 +6543,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6267 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6543 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6268 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6544 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6269 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6545 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6546 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
6270#endif /* CONFIG_NFS_V4_1 */ 6547#endif /* CONFIG_NFS_V4_1 */
6271}; 6548};
6272 6549
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 23e794410669..c80add6e2213 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -135,14 +135,14 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
135 nfs_unlock_request(req); 135 nfs_unlock_request(req);
136} 136}
137 137
138/** 138/*
139 * nfs_clear_request - Free up all resources allocated to the request 139 * nfs_clear_request - Free up all resources allocated to the request
140 * @req: 140 * @req:
141 * 141 *
142 * Release page and open context resources associated with a read/write 142 * Release page and open context resources associated with a read/write
143 * request after it has completed. 143 * request after it has completed.
144 */ 144 */
145void nfs_clear_request(struct nfs_page *req) 145static void nfs_clear_request(struct nfs_page *req)
146{ 146{
147 struct page *page = req->wb_page; 147 struct page *page = req->wb_page;
148 struct nfs_open_context *ctx = req->wb_context; 148 struct nfs_open_context *ctx = req->wb_context;
@@ -223,6 +223,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
223 desc->pg_count = 0; 223 desc->pg_count = 0;
224 desc->pg_bsize = bsize; 224 desc->pg_bsize = bsize;
225 desc->pg_base = 0; 225 desc->pg_base = 0;
226 desc->pg_moreio = 0;
226 desc->pg_inode = inode; 227 desc->pg_inode = inode;
227 desc->pg_doio = doio; 228 desc->pg_doio = doio;
228 desc->pg_ioflags = io_flags; 229 desc->pg_ioflags = io_flags;
@@ -335,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
335 struct nfs_page *req) 336 struct nfs_page *req)
336{ 337{
337 while (!nfs_pageio_do_add_request(desc, req)) { 338 while (!nfs_pageio_do_add_request(desc, req)) {
339 desc->pg_moreio = 1;
338 nfs_pageio_doio(desc); 340 nfs_pageio_doio(desc);
339 if (desc->pg_error < 0) 341 if (desc->pg_error < 0)
340 return 0; 342 return 0;
343 desc->pg_moreio = 0;
341 } 344 }
342 return 1; 345 return 1;
343} 346}
@@ -395,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi,
395 pgoff_t idx_end; 398 pgoff_t idx_end;
396 int found, i; 399 int found, i;
397 int res; 400 int res;
401 struct list_head *list;
398 402
399 res = 0; 403 res = 0;
400 if (npages == 0) 404 if (npages == 0)
@@ -415,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi,
415 idx_start = req->wb_index + 1; 419 idx_start = req->wb_index + 1;
416 if (nfs_set_page_tag_locked(req)) { 420 if (nfs_set_page_tag_locked(req)) {
417 kref_get(&req->wb_kref); 421 kref_get(&req->wb_kref);
418 nfs_list_remove_request(req);
419 radix_tree_tag_clear(&nfsi->nfs_page_tree, 422 radix_tree_tag_clear(&nfsi->nfs_page_tree,
420 req->wb_index, tag); 423 req->wb_index, tag);
421 nfs_list_add_request(req, dst); 424 list = pnfs_choose_commit_list(req, dst);
425 nfs_list_add_request(req, list);
422 res++; 426 res++;
423 if (res == INT_MAX) 427 if (res == INT_MAX)
424 goto out; 428 goto out;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f38813a0a295..d9ab97269ce6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -259,6 +259,7 @@ put_lseg(struct pnfs_layout_segment *lseg)
259 pnfs_free_lseg_list(&free_me); 259 pnfs_free_lseg_list(&free_me);
260 } 260 }
261} 261}
262EXPORT_SYMBOL_GPL(put_lseg);
262 263
263static bool 264static bool
264should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 265should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
@@ -471,6 +472,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
471 struct nfs_server *server = NFS_SERVER(ino); 472 struct nfs_server *server = NFS_SERVER(ino);
472 struct nfs4_layoutget *lgp; 473 struct nfs4_layoutget *lgp;
473 struct pnfs_layout_segment *lseg = NULL; 474 struct pnfs_layout_segment *lseg = NULL;
475 struct page **pages = NULL;
476 int i;
477 u32 max_resp_sz, max_pages;
474 478
475 dprintk("--> %s\n", __func__); 479 dprintk("--> %s\n", __func__);
476 480
@@ -478,6 +482,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
478 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 482 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
479 if (lgp == NULL) 483 if (lgp == NULL)
480 return NULL; 484 return NULL;
485
486 /* allocate pages for xdr post processing */
487 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
488 max_pages = max_resp_sz >> PAGE_SHIFT;
489
490 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
491 if (!pages)
492 goto out_err_free;
493
494 for (i = 0; i < max_pages; i++) {
495 pages[i] = alloc_page(GFP_KERNEL);
496 if (!pages[i])
497 goto out_err_free;
498 }
499
481 lgp->args.minlength = NFS4_MAX_UINT64; 500 lgp->args.minlength = NFS4_MAX_UINT64;
482 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 501 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
483 lgp->args.range.iomode = iomode; 502 lgp->args.range.iomode = iomode;
@@ -486,6 +505,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
486 lgp->args.type = server->pnfs_curr_ld->id; 505 lgp->args.type = server->pnfs_curr_ld->id;
487 lgp->args.inode = ino; 506 lgp->args.inode = ino;
488 lgp->args.ctx = get_nfs_open_context(ctx); 507 lgp->args.ctx = get_nfs_open_context(ctx);
508 lgp->args.layout.pages = pages;
509 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
489 lgp->lsegpp = &lseg; 510 lgp->lsegpp = &lseg;
490 511
491 /* Synchronously retrieve layout information from server and 512 /* Synchronously retrieve layout information from server and
@@ -496,7 +517,26 @@ send_layoutget(struct pnfs_layout_hdr *lo,
496 /* remember that LAYOUTGET failed and suspend trying */ 517 /* remember that LAYOUTGET failed and suspend trying */
497 set_bit(lo_fail_bit(iomode), &lo->plh_flags); 518 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
498 } 519 }
520
521 /* free xdr pages */
522 for (i = 0; i < max_pages; i++)
523 __free_page(pages[i]);
524 kfree(pages);
525
499 return lseg; 526 return lseg;
527
528out_err_free:
529 /* free any allocated xdr pages, lgp as it's not used */
530 if (pages) {
531 for (i = 0; i < max_pages; i++) {
532 if (!pages[i])
533 break;
534 __free_page(pages[i]);
535 }
536 kfree(pages);
537 }
538 kfree(lgp);
539 return NULL;
500} 540}
501 541
502bool pnfs_roc(struct inode *ino) 542bool pnfs_roc(struct inode *ino)
@@ -945,3 +985,105 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
945 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 985 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
946 return trypnfs; 986 return trypnfs;
947} 987}
988
989/*
990 * Currently there is only one (whole file) write lseg.
991 */
992static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
993{
994 struct pnfs_layout_segment *lseg, *rv = NULL;
995
996 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
997 if (lseg->pls_range.iomode == IOMODE_RW)
998 rv = lseg;
999 return rv;
1000}
1001
1002void
1003pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1004{
1005 struct nfs_inode *nfsi = NFS_I(wdata->inode);
1006 loff_t end_pos = wdata->args.offset + wdata->res.count;
1007
1008 spin_lock(&nfsi->vfs_inode.i_lock);
1009 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1010 /* references matched in nfs4_layoutcommit_release */
1011 get_lseg(wdata->lseg);
1012 wdata->lseg->pls_lc_cred =
1013 get_rpccred(wdata->args.context->state->owner->so_cred);
1014 mark_inode_dirty_sync(wdata->inode);
1015 dprintk("%s: Set layoutcommit for inode %lu ",
1016 __func__, wdata->inode->i_ino);
1017 }
1018 if (end_pos > wdata->lseg->pls_end_pos)
1019 wdata->lseg->pls_end_pos = end_pos;
1020 spin_unlock(&nfsi->vfs_inode.i_lock);
1021}
1022EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1023
1024/*
1025 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
1026 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
1027 * data to disk to allow the server to recover the data if it crashes.
1028 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
1029 * is off, and a COMMIT is sent to a data server, or
1030 * if WRITEs to a data server return NFS_DATA_SYNC.
1031 */
1032int
1033pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1034{
1035 struct nfs4_layoutcommit_data *data;
1036 struct nfs_inode *nfsi = NFS_I(inode);
1037 struct pnfs_layout_segment *lseg;
1038 struct rpc_cred *cred;
1039 loff_t end_pos;
1040 int status = 0;
1041
1042 dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
1043
1044 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1045 return 0;
1046
1047 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1048 data = kzalloc(sizeof(*data), GFP_NOFS);
1049 if (!data) {
1050 mark_inode_dirty_sync(inode);
1051 status = -ENOMEM;
1052 goto out;
1053 }
1054
1055 spin_lock(&inode->i_lock);
1056 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1057 spin_unlock(&inode->i_lock);
1058 kfree(data);
1059 goto out;
1060 }
1061 /*
1062 * Currently only one (whole file) write lseg which is referenced
1063 * in pnfs_set_layoutcommit and will be found.
1064 */
1065 lseg = pnfs_list_write_lseg(inode);
1066
1067 end_pos = lseg->pls_end_pos;
1068 cred = lseg->pls_lc_cred;
1069 lseg->pls_end_pos = 0;
1070 lseg->pls_lc_cred = NULL;
1071
1072 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
1073 sizeof(nfsi->layout->plh_stateid.data));
1074 spin_unlock(&inode->i_lock);
1075
1076 data->args.inode = inode;
1077 data->lseg = lseg;
1078 data->cred = cred;
1079 nfs_fattr_init(&data->fattr);
1080 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1081 data->res.fattr = &data->fattr;
1082 data->args.lastbytewritten = end_pos - 1;
1083 data->res.server = NFS_SERVER(inode);
1084
1085 status = nfs4_proc_layoutcommit(data, sync);
1086out:
1087 dprintk("<-- %s status %d\n", __func__, status);
1088 return status;
1089}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 6380b9405bcd..bc4827202e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -43,6 +43,8 @@ struct pnfs_layout_segment {
43 atomic_t pls_refcount; 43 atomic_t pls_refcount;
44 unsigned long pls_flags; 44 unsigned long pls_flags;
45 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
46 struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */
47 loff_t pls_end_pos; /* LAYOUTCOMMIT write end */
46}; 48};
47 49
48enum pnfs_try_status { 50enum pnfs_try_status {
@@ -74,6 +76,13 @@ struct pnfs_layoutdriver_type {
74 /* test for nfs page cache coalescing */ 76 /* test for nfs page cache coalescing */
75 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 77 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
76 78
79 /* Returns true if layoutdriver wants to divert this request to
80 * driver's commit routine.
81 */
82 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
83 struct list_head * (*choose_commit_list) (struct nfs_page *req);
84 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
85
77 /* 86 /*
78 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 87 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
79 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 88 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -100,7 +109,6 @@ struct pnfs_device {
100 unsigned int layout_type; 109 unsigned int layout_type;
101 unsigned int mincount; 110 unsigned int mincount;
102 struct page **pages; 111 struct page **pages;
103 void *area;
104 unsigned int pgbase; 112 unsigned int pgbase;
105 unsigned int pglen; 113 unsigned int pglen;
106}; 114};
@@ -145,7 +153,8 @@ bool pnfs_roc(struct inode *ino);
145void pnfs_roc_release(struct inode *ino); 153void pnfs_roc_release(struct inode *ino);
146void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 154void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
147bool pnfs_roc_drain(struct inode *ino, u32 *barrier); 155bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
148 156void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
157int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
149 158
150static inline int lo_fail_bit(u32 iomode) 159static inline int lo_fail_bit(u32 iomode)
151{ 160{
@@ -169,6 +178,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
169 return nfss->pnfs_curr_ld != NULL; 178 return nfss->pnfs_curr_ld != NULL;
170} 179}
171 180
181static inline void
182pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
183{
184 if (lseg) {
185 struct pnfs_layoutdriver_type *ld;
186
187 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
188 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
189 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
190 req->wb_commit_lseg = get_lseg(lseg);
191 }
192 }
193}
194
195static inline int
196pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
197{
198 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
199 return PNFS_NOT_ATTEMPTED;
200 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
201}
202
203static inline struct list_head *
204pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
205{
206 struct list_head *rv;
207
208 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) {
209 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode;
210
211 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
212 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
213 /* matched by ref taken when PG_PNFS_COMMIT is set */
214 put_lseg(req->wb_commit_lseg);
215 } else
216 rv = mds;
217 return rv;
218}
219
220static inline void pnfs_clear_request_commit(struct nfs_page *req)
221{
222 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags))
223 put_lseg(req->wb_commit_lseg);
224}
225
172#else /* CONFIG_NFS_V4_1 */ 226#else /* CONFIG_NFS_V4_1 */
173 227
174static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 228static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -252,6 +306,31 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
252 pgio->pg_test = NULL; 306 pgio->pg_test = NULL;
253} 307}
254 308
309static inline void
310pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
311{
312}
313
314static inline int
315pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
316{
317 return PNFS_NOT_ATTEMPTED;
318}
319
320static inline struct list_head *
321pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
322{
323 return mds;
324}
325
326static inline void pnfs_clear_request_commit(struct nfs_page *req)
327{
328}
329
330static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
331{
332 return 0;
333}
255#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
256 335
257#endif /* FS_NFS_PNFS_H */ 336#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b8ec170f2a0f..ac40b8535d7e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -177,7 +177,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
177} 177}
178 178
179static int 179static int
180nfs_proc_lookup(struct inode *dir, struct qstr *name, 180nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
181 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 181 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
182{ 182{
183 struct nfs_diropargs arg = { 183 struct nfs_diropargs arg = {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 47a3ad63e0d5..e4cbc11a74ab 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -59,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
59 } 59 }
60 return p; 60 return p;
61} 61}
62EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
62 63
63void nfs_commit_free(struct nfs_write_data *p) 64void nfs_commit_free(struct nfs_write_data *p)
64{ 65{
@@ -66,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p)
66 kfree(p->pagevec); 67 kfree(p->pagevec);
67 mempool_free(p, nfs_commit_mempool); 68 mempool_free(p, nfs_commit_mempool);
68} 69}
70EXPORT_SYMBOL_GPL(nfs_commit_free);
69 71
70struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 72struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
71{ 73{
@@ -179,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc)
179 if (wbc->for_reclaim) 181 if (wbc->for_reclaim)
180 return FLUSH_HIGHPRI | FLUSH_STABLE; 182 return FLUSH_HIGHPRI | FLUSH_STABLE;
181 if (wbc->for_kupdate || wbc->for_background) 183 if (wbc->for_kupdate || wbc->for_background)
182 return FLUSH_LOWPRI; 184 return FLUSH_LOWPRI | FLUSH_COND_STABLE;
183 return 0; 185 return FLUSH_COND_STABLE;
184} 186}
185 187
186/* 188/*
@@ -387,11 +389,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
387 spin_lock(&inode->i_lock); 389 spin_lock(&inode->i_lock);
388 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 390 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
389 BUG_ON(error); 391 BUG_ON(error);
390 if (!nfsi->npages) { 392 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
391 igrab(inode); 393 nfsi->change_attr++;
392 if (nfs_have_delegation(inode, FMODE_WRITE))
393 nfsi->change_attr++;
394 }
395 set_bit(PG_MAPPED, &req->wb_flags); 394 set_bit(PG_MAPPED, &req->wb_flags);
396 SetPagePrivate(req->wb_page); 395 SetPagePrivate(req->wb_page);
397 set_page_private(req->wb_page, (unsigned long)req); 396 set_page_private(req->wb_page, (unsigned long)req);
@@ -421,11 +420,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
421 clear_bit(PG_MAPPED, &req->wb_flags); 420 clear_bit(PG_MAPPED, &req->wb_flags);
422 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 421 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
423 nfsi->npages--; 422 nfsi->npages--;
424 if (!nfsi->npages) { 423 spin_unlock(&inode->i_lock);
425 spin_unlock(&inode->i_lock);
426 iput(inode);
427 } else
428 spin_unlock(&inode->i_lock);
429 nfs_release_request(req); 424 nfs_release_request(req);
430} 425}
431 426
@@ -441,7 +436,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
441 * Add a request to the inode's commit list. 436 * Add a request to the inode's commit list.
442 */ 437 */
443static void 438static void
444nfs_mark_request_commit(struct nfs_page *req) 439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
445{ 440{
446 struct inode *inode = req->wb_context->path.dentry->d_inode; 441 struct inode *inode = req->wb_context->path.dentry->d_inode;
447 struct nfs_inode *nfsi = NFS_I(inode); 442 struct nfs_inode *nfsi = NFS_I(inode);
@@ -453,6 +448,7 @@ nfs_mark_request_commit(struct nfs_page *req)
453 NFS_PAGE_TAG_COMMIT); 448 NFS_PAGE_TAG_COMMIT);
454 nfsi->ncommit++; 449 nfsi->ncommit++;
455 spin_unlock(&inode->i_lock); 450 spin_unlock(&inode->i_lock);
451 pnfs_mark_request_commit(req, lseg);
456 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 452 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
457 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 453 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
458 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 454 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -474,14 +470,18 @@ nfs_clear_request_commit(struct nfs_page *req)
474static inline 470static inline
475int nfs_write_need_commit(struct nfs_write_data *data) 471int nfs_write_need_commit(struct nfs_write_data *data)
476{ 472{
477 return data->verf.committed != NFS_FILE_SYNC; 473 if (data->verf.committed == NFS_DATA_SYNC)
474 return data->lseg == NULL;
475 else
476 return data->verf.committed != NFS_FILE_SYNC;
478} 477}
479 478
480static inline 479static inline
481int nfs_reschedule_unstable_write(struct nfs_page *req) 480int nfs_reschedule_unstable_write(struct nfs_page *req,
481 struct nfs_write_data *data)
482{ 482{
483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
484 nfs_mark_request_commit(req); 484 nfs_mark_request_commit(req, data->lseg);
485 return 1; 485 return 1;
486 } 486 }
487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -492,7 +492,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
492} 492}
493#else 493#else
494static inline void 494static inline void
495nfs_mark_request_commit(struct nfs_page *req) 495nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
496{ 496{
497} 497}
498 498
@@ -509,7 +509,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
509} 509}
510 510
511static inline 511static inline
512int nfs_reschedule_unstable_write(struct nfs_page *req) 512int nfs_reschedule_unstable_write(struct nfs_page *req,
513 struct nfs_write_data *data)
513{ 514{
514 return 0; 515 return 0;
515} 516}
@@ -541,11 +542,15 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
541 if (!nfs_need_commit(nfsi)) 542 if (!nfs_need_commit(nfsi))
542 return 0; 543 return 0;
543 544
545 spin_lock(&inode->i_lock);
544 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
545 if (ret > 0) 547 if (ret > 0)
546 nfsi->ncommit -= ret; 548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
547 if (nfs_need_commit(NFS_I(inode))) 551 if (nfs_need_commit(NFS_I(inode)))
548 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553
549 return ret; 554 return ret;
550} 555}
551#else 556#else
@@ -612,9 +617,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
612 } 617 }
613 618
614 if (nfs_clear_request_commit(req) && 619 if (nfs_clear_request_commit(req) &&
615 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 620 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
616 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) 621 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
617 NFS_I(inode)->ncommit--; 622 NFS_I(inode)->ncommit--;
623 pnfs_clear_request_commit(req);
624 }
618 625
619 /* Okay, the request matches. Update the region */ 626 /* Okay, the request matches. Update the region */
620 if (offset < req->wb_offset) { 627 if (offset < req->wb_offset) {
@@ -762,11 +769,12 @@ int nfs_updatepage(struct file *file, struct page *page,
762 return status; 769 return status;
763} 770}
764 771
765static void nfs_writepage_release(struct nfs_page *req) 772static void nfs_writepage_release(struct nfs_page *req,
773 struct nfs_write_data *data)
766{ 774{
767 struct page *page = req->wb_page; 775 struct page *page = req->wb_page;
768 776
769 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) 777 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
770 nfs_inode_remove_request(req); 778 nfs_inode_remove_request(req);
771 nfs_clear_page_tag_locked(req); 779 nfs_clear_page_tag_locked(req);
772 nfs_end_page_writeback(page); 780 nfs_end_page_writeback(page);
@@ -863,7 +871,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
863 data->args.context = get_nfs_open_context(req->wb_context); 871 data->args.context = get_nfs_open_context(req->wb_context);
864 data->args.lock_context = req->wb_lock_context; 872 data->args.lock_context = req->wb_lock_context;
865 data->args.stable = NFS_UNSTABLE; 873 data->args.stable = NFS_UNSTABLE;
866 if (how & FLUSH_STABLE) { 874 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
867 data->args.stable = NFS_DATA_SYNC; 875 data->args.stable = NFS_DATA_SYNC;
868 if (!nfs_need_commit(NFS_I(inode))) 876 if (!nfs_need_commit(NFS_I(inode)))
869 data->args.stable = NFS_FILE_SYNC; 877 data->args.stable = NFS_FILE_SYNC;
@@ -912,6 +920,12 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
912 920
913 nfs_list_remove_request(req); 921 nfs_list_remove_request(req);
914 922
923 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
924 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
925 desc->pg_count > wsize))
926 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
927
928
915 nbytes = desc->pg_count; 929 nbytes = desc->pg_count;
916 do { 930 do {
917 size_t len = min(nbytes, wsize); 931 size_t len = min(nbytes, wsize);
@@ -1002,6 +1016,10 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1002 if ((!lseg) && list_is_singular(&data->pages)) 1016 if ((!lseg) && list_is_singular(&data->pages))
1003 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 1017 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
1004 1018
1019 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1020 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1021 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1022
1005 /* Set up the argument struct */ 1023 /* Set up the argument struct */
1006 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); 1024 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
1007out: 1025out:
@@ -1074,7 +1092,7 @@ static void nfs_writeback_release_partial(void *calldata)
1074 1092
1075out: 1093out:
1076 if (atomic_dec_and_test(&req->wb_complete)) 1094 if (atomic_dec_and_test(&req->wb_complete))
1077 nfs_writepage_release(req); 1095 nfs_writepage_release(req, data);
1078 nfs_writedata_release(calldata); 1096 nfs_writedata_release(calldata);
1079} 1097}
1080 1098
@@ -1141,7 +1159,7 @@ static void nfs_writeback_release_full(void *calldata)
1141 1159
1142 if (nfs_write_need_commit(data)) { 1160 if (nfs_write_need_commit(data)) {
1143 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1161 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1144 nfs_mark_request_commit(req); 1162 nfs_mark_request_commit(req, data->lseg);
1145 dprintk(" marked for commit\n"); 1163 dprintk(" marked for commit\n");
1146 goto next; 1164 goto next;
1147 } 1165 }
@@ -1251,57 +1269,82 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1251#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1269#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1252static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1270static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1253{ 1271{
1272 int ret;
1273
1254 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1274 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1255 return 1; 1275 return 1;
1256 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, 1276 if (!may_wait)
1257 NFS_INO_COMMIT, nfs_wait_bit_killable, 1277 return 0;
1258 TASK_KILLABLE)) 1278 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1259 return 1; 1279 NFS_INO_COMMIT,
1260 return 0; 1280 nfs_wait_bit_killable,
1281 TASK_KILLABLE);
1282 return (ret < 0) ? ret : 1;
1261} 1283}
1262 1284
1263static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1285void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1264{ 1286{
1265 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1287 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1266 smp_mb__after_clear_bit(); 1288 smp_mb__after_clear_bit();
1267 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1289 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1268} 1290}
1291EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1269 1292
1270 1293void nfs_commitdata_release(void *data)
1271static void nfs_commitdata_release(void *data)
1272{ 1294{
1273 struct nfs_write_data *wdata = data; 1295 struct nfs_write_data *wdata = data;
1274 1296
1297 put_lseg(wdata->lseg);
1275 put_nfs_open_context(wdata->args.context); 1298 put_nfs_open_context(wdata->args.context);
1276 nfs_commit_free(wdata); 1299 nfs_commit_free(wdata);
1277} 1300}
1301EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1278 1302
1279/* 1303int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1280 * Set up the argument/result storage required for the RPC call. 1304 const struct rpc_call_ops *call_ops,
1281 */ 1305 int how)
1282static int nfs_commit_rpcsetup(struct list_head *head,
1283 struct nfs_write_data *data,
1284 int how)
1285{ 1306{
1286 struct nfs_page *first = nfs_list_entry(head->next);
1287 struct inode *inode = first->wb_context->path.dentry->d_inode;
1288 int priority = flush_task_priority(how);
1289 struct rpc_task *task; 1307 struct rpc_task *task;
1308 int priority = flush_task_priority(how);
1290 struct rpc_message msg = { 1309 struct rpc_message msg = {
1291 .rpc_argp = &data->args, 1310 .rpc_argp = &data->args,
1292 .rpc_resp = &data->res, 1311 .rpc_resp = &data->res,
1293 .rpc_cred = first->wb_context->cred, 1312 .rpc_cred = data->cred,
1294 }; 1313 };
1295 struct rpc_task_setup task_setup_data = { 1314 struct rpc_task_setup task_setup_data = {
1296 .task = &data->task, 1315 .task = &data->task,
1297 .rpc_client = NFS_CLIENT(inode), 1316 .rpc_client = clnt,
1298 .rpc_message = &msg, 1317 .rpc_message = &msg,
1299 .callback_ops = &nfs_commit_ops, 1318 .callback_ops = call_ops,
1300 .callback_data = data, 1319 .callback_data = data,
1301 .workqueue = nfsiod_workqueue, 1320 .workqueue = nfsiod_workqueue,
1302 .flags = RPC_TASK_ASYNC, 1321 .flags = RPC_TASK_ASYNC,
1303 .priority = priority, 1322 .priority = priority,
1304 }; 1323 };
1324 /* Set up the initial task struct. */
1325 NFS_PROTO(data->inode)->commit_setup(data, &msg);
1326
1327 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1328
1329 task = rpc_run_task(&task_setup_data);
1330 if (IS_ERR(task))
1331 return PTR_ERR(task);
1332 if (how & FLUSH_SYNC)
1333 rpc_wait_for_completion_task(task);
1334 rpc_put_task(task);
1335 return 0;
1336}
1337EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1338
1339/*
1340 * Set up the argument/result storage required for the RPC call.
1341 */
1342void nfs_init_commit(struct nfs_write_data *data,
1343 struct list_head *head,
1344 struct pnfs_layout_segment *lseg)
1345{
1346 struct nfs_page *first = nfs_list_entry(head->next);
1347 struct inode *inode = first->wb_context->path.dentry->d_inode;
1305 1348
1306 /* Set up the RPC argument and reply structs 1349 /* Set up the RPC argument and reply structs
1307 * NB: take care not to mess about with data->commit et al. */ 1350 * NB: take care not to mess about with data->commit et al. */
@@ -1309,7 +1352,9 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1309 list_splice_init(head, &data->pages); 1352 list_splice_init(head, &data->pages);
1310 1353
1311 data->inode = inode; 1354 data->inode = inode;
1312 data->cred = msg.rpc_cred; 1355 data->cred = first->wb_context->cred;
1356 data->lseg = lseg; /* reference transferred */
1357 data->mds_ops = &nfs_commit_ops;
1313 1358
1314 data->args.fh = NFS_FH(data->inode); 1359 data->args.fh = NFS_FH(data->inode);
1315 /* Note: we always request a commit of the entire inode */ 1360 /* Note: we always request a commit of the entire inode */
@@ -1320,20 +1365,25 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1320 data->res.fattr = &data->fattr; 1365 data->res.fattr = &data->fattr;
1321 data->res.verf = &data->verf; 1366 data->res.verf = &data->verf;
1322 nfs_fattr_init(&data->fattr); 1367 nfs_fattr_init(&data->fattr);
1368}
1369EXPORT_SYMBOL_GPL(nfs_init_commit);
1323 1370
1324 /* Set up the initial task struct. */ 1371void nfs_retry_commit(struct list_head *page_list,
1325 NFS_PROTO(inode)->commit_setup(data, &msg); 1372 struct pnfs_layout_segment *lseg)
1326 1373{
1327 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1374 struct nfs_page *req;
1328 1375
1329 task = rpc_run_task(&task_setup_data); 1376 while (!list_empty(page_list)) {
1330 if (IS_ERR(task)) 1377 req = nfs_list_entry(page_list->next);
1331 return PTR_ERR(task); 1378 nfs_list_remove_request(req);
1332 if (how & FLUSH_SYNC) 1379 nfs_mark_request_commit(req, lseg);
1333 rpc_wait_for_completion_task(task); 1380 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1334 rpc_put_task(task); 1381 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1335 return 0; 1382 BDI_RECLAIMABLE);
1383 nfs_clear_page_tag_locked(req);
1384 }
1336} 1385}
1386EXPORT_SYMBOL_GPL(nfs_retry_commit);
1337 1387
1338/* 1388/*
1339 * Commit dirty pages 1389 * Commit dirty pages
@@ -1342,7 +1392,6 @@ static int
1342nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1392nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1343{ 1393{
1344 struct nfs_write_data *data; 1394 struct nfs_write_data *data;
1345 struct nfs_page *req;
1346 1395
1347 data = nfs_commitdata_alloc(); 1396 data = nfs_commitdata_alloc();
1348 1397
@@ -1350,17 +1399,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1350 goto out_bad; 1399 goto out_bad;
1351 1400
1352 /* Set up the argument struct */ 1401 /* Set up the argument struct */
1353 return nfs_commit_rpcsetup(head, data, how); 1402 nfs_init_commit(data, head, NULL);
1403 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
1354 out_bad: 1404 out_bad:
1355 while (!list_empty(head)) { 1405 nfs_retry_commit(head, NULL);
1356 req = nfs_list_entry(head->next);
1357 nfs_list_remove_request(req);
1358 nfs_mark_request_commit(req);
1359 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1360 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1361 BDI_RECLAIMABLE);
1362 nfs_clear_page_tag_locked(req);
1363 }
1364 nfs_commit_clear_lock(NFS_I(inode)); 1406 nfs_commit_clear_lock(NFS_I(inode));
1365 return -ENOMEM; 1407 return -ENOMEM;
1366} 1408}
@@ -1380,10 +1422,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1380 return; 1422 return;
1381} 1423}
1382 1424
1383static void nfs_commit_release(void *calldata) 1425void nfs_commit_release_pages(struct nfs_write_data *data)
1384{ 1426{
1385 struct nfs_write_data *data = calldata; 1427 struct nfs_page *req;
1386 struct nfs_page *req;
1387 int status = data->task.tk_status; 1428 int status = data->task.tk_status;
1388 1429
1389 while (!list_empty(&data->pages)) { 1430 while (!list_empty(&data->pages)) {
@@ -1417,6 +1458,14 @@ static void nfs_commit_release(void *calldata)
1417 next: 1458 next:
1418 nfs_clear_page_tag_locked(req); 1459 nfs_clear_page_tag_locked(req);
1419 } 1460 }
1461}
1462EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1463
1464static void nfs_commit_release(void *calldata)
1465{
1466 struct nfs_write_data *data = calldata;
1467
1468 nfs_commit_release_pages(data);
1420 nfs_commit_clear_lock(NFS_I(data->inode)); 1469 nfs_commit_clear_lock(NFS_I(data->inode));
1421 nfs_commitdata_release(calldata); 1470 nfs_commitdata_release(calldata);
1422} 1471}
@@ -1433,23 +1482,28 @@ int nfs_commit_inode(struct inode *inode, int how)
1433{ 1482{
1434 LIST_HEAD(head); 1483 LIST_HEAD(head);
1435 int may_wait = how & FLUSH_SYNC; 1484 int may_wait = how & FLUSH_SYNC;
1436 int res = 0; 1485 int res;
1437 1486
1438 if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) 1487 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1488 if (res <= 0)
1439 goto out_mark_dirty; 1489 goto out_mark_dirty;
1440 spin_lock(&inode->i_lock);
1441 res = nfs_scan_commit(inode, &head, 0, 0); 1490 res = nfs_scan_commit(inode, &head, 0, 0);
1442 spin_unlock(&inode->i_lock);
1443 if (res) { 1491 if (res) {
1444 int error = nfs_commit_list(inode, &head, how); 1492 int error;
1493
1494 error = pnfs_commit_list(inode, &head, how);
1495 if (error == PNFS_NOT_ATTEMPTED)
1496 error = nfs_commit_list(inode, &head, how);
1445 if (error < 0) 1497 if (error < 0)
1446 return error; 1498 return error;
1447 if (may_wait) 1499 if (!may_wait)
1448 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1449 nfs_wait_bit_killable,
1450 TASK_KILLABLE);
1451 else
1452 goto out_mark_dirty; 1500 goto out_mark_dirty;
1501 error = wait_on_bit(&NFS_I(inode)->flags,
1502 NFS_INO_COMMIT,
1503 nfs_wait_bit_killable,
1504 TASK_KILLABLE);
1505 if (error < 0)
1506 return error;
1453 } else 1507 } else
1454 nfs_commit_clear_lock(NFS_I(inode)); 1508 nfs_commit_clear_lock(NFS_I(inode));
1455 return res; 1509 return res;
@@ -1503,7 +1557,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1503 1557
1504int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1558int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1505{ 1559{
1506 return nfs_commit_unstable_pages(inode, wbc); 1560 int ret;
1561
1562 ret = nfs_commit_unstable_pages(inode, wbc);
1563 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
1564 int status;
1565 bool sync = true;
1566
1567 if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
1568 wbc->for_background)
1569 sync = false;
1570
1571 status = pnfs_layoutcommit_inode(inode, sync);
1572 if (status < 0)
1573 return status;
1574 }
1575 return ret;
1507} 1576}
1508 1577
1509/* 1578/*
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 84c27d69d421..6940439bd609 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -117,7 +117,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
117 * invoked in contexts where a memory allocation failure is 117 * invoked in contexts where a memory allocation failure is
118 * fatal. Fortunately this fake ACL is small enough to 118 * fatal. Fortunately this fake ACL is small enough to
119 * construct on the stack. */ 119 * construct on the stack. */
120 memset(acl2, 0, sizeof(acl2));
121 posix_acl_init(acl2, 4); 120 posix_acl_init(acl2, 4);
122 121
123 /* Insert entries in canonical order: other orders seem 122 /* Insert entries in canonical order: other orders seem
@@ -174,7 +173,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
174 return -EINVAL; 173 return -EINVAL;
175 break; 174 break;
176 case ACL_MASK: 175 case ACL_MASK:
177 /* Solaris sometimes sets additonal bits in the mask */ 176 /* Solaris sometimes sets additional bits in the mask */
178 entry->e_perm &= S_IRWXO; 177 entry->e_perm &= S_IRWXO;
179 break; 178 break;
180 default: 179 default:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8b31e5f8795d..ad000aeb21a2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -299,7 +299,6 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
299 299
300#define EXPORT_HASHBITS 8 300#define EXPORT_HASHBITS 8
301#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) 301#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS)
302#define EXPORT_HASHMASK (EXPORT_HASHMAX -1)
303 302
304static struct cache_head *export_table[EXPORT_HASHMAX]; 303static struct cache_head *export_table[EXPORT_HASHMAX];
305 304
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 0c6d81670137..7c831a2731fa 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,7 +38,6 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
38 exp_readlock(); 38 exp_readlock();
39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); 39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
40 fh_put(&fh); 40 fh_put(&fh);
41 rqstp->rq_client = NULL;
42 exp_readunlock(); 41 exp_readunlock();
43 /* We return nlm error codes as nlm doesn't know 42 /* We return nlm error codes as nlm doesn't know
44 * about nfsd, but nfsd does know about nlm.. 43 * about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7e84a852cdae..ad48faca20fc 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -702,7 +702,7 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
702 *p++ = htonl(resp->eof); 702 *p++ = htonl(resp->eof);
703 *p++ = htonl(resp->count); /* xdr opaque count */ 703 *p++ = htonl(resp->count); /* xdr opaque count */
704 xdr_ressize_check(rqstp, p); 704 xdr_ressize_check(rqstp, p);
705 /* now update rqstp->rq_res to reflect data aswell */ 705 /* now update rqstp->rq_res to reflect data as well */
706 rqstp->rq_res.page_len = resp->count; 706 rqstp->rq_res.page_len = resp->count;
707 if (resp->count & 3) { 707 if (resp->count & 3) {
708 /* need to pad the tail */ 708 /* need to pad the tail */
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 6d2c397d458b..55780a22fdbd 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -63,7 +63,6 @@ struct ent {
63 63
64#define ENT_HASHBITS 8 64#define ENT_HASHBITS 8
65#define ENT_HASHMAX (1 << ENT_HASHBITS) 65#define ENT_HASHMAX (1 << ENT_HASHBITS)
66#define ENT_HASHMASK (ENT_HASHMAX - 1)
67 66
68static void 67static void
69ent_init(struct cache_head *cnew, struct cache_head *citm) 68ent_init(struct cache_head *cnew, struct cache_head *citm)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index db52546143d1..5fcb1396a7e3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -984,8 +984,8 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
984 void *); 984 void *);
985enum nfsd4_op_flags { 985enum nfsd4_op_flags {
986 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ 986 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
987 ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */ 987 ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
988 ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */ 988 ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
989}; 989};
990 990
991struct nfsd4_operation { 991struct nfsd4_operation {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b566ec14e18..4cf04e11c66c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -148,7 +148,7 @@ static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
148/* hash table for nfs4_file */ 148/* hash table for nfs4_file */
149#define FILE_HASH_BITS 8 149#define FILE_HASH_BITS 8
150#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) 150#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
151#define FILE_HASH_MASK (FILE_HASH_SIZE - 1) 151
152/* hash table for (open)nfs4_stateid */ 152/* hash table for (open)nfs4_stateid */
153#define STATEID_HASH_BITS 10 153#define STATEID_HASH_BITS 10
154#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) 154#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
@@ -258,6 +258,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
258 if (atomic_dec_and_test(&fp->fi_delegees)) { 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
260 fp->fi_lease = NULL; 260 fp->fi_lease = NULL;
261 fput(fp->fi_deleg_file);
261 fp->fi_deleg_file = NULL; 262 fp->fi_deleg_file = NULL;
262 } 263 }
263} 264}
@@ -316,64 +317,6 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
316static struct list_head client_lru; 317static struct list_head client_lru;
317static struct list_head close_lru; 318static struct list_head close_lru;
318 319
319static void unhash_generic_stateid(struct nfs4_stateid *stp)
320{
321 list_del(&stp->st_hash);
322 list_del(&stp->st_perfile);
323 list_del(&stp->st_perstateowner);
324}
325
326static void free_generic_stateid(struct nfs4_stateid *stp)
327{
328 put_nfs4_file(stp->st_file);
329 kmem_cache_free(stateid_slab, stp);
330}
331
332static void release_lock_stateid(struct nfs4_stateid *stp)
333{
334 struct file *file;
335
336 unhash_generic_stateid(stp);
337 file = find_any_file(stp->st_file);
338 if (file)
339 locks_remove_posix(file, (fl_owner_t)stp->st_stateowner);
340 free_generic_stateid(stp);
341}
342
343static void unhash_lockowner(struct nfs4_stateowner *sop)
344{
345 struct nfs4_stateid *stp;
346
347 list_del(&sop->so_idhash);
348 list_del(&sop->so_strhash);
349 list_del(&sop->so_perstateid);
350 while (!list_empty(&sop->so_stateids)) {
351 stp = list_first_entry(&sop->so_stateids,
352 struct nfs4_stateid, st_perstateowner);
353 release_lock_stateid(stp);
354 }
355}
356
357static void release_lockowner(struct nfs4_stateowner *sop)
358{
359 unhash_lockowner(sop);
360 nfs4_put_stateowner(sop);
361}
362
363static void
364release_stateid_lockowners(struct nfs4_stateid *open_stp)
365{
366 struct nfs4_stateowner *lock_sop;
367
368 while (!list_empty(&open_stp->st_lockowners)) {
369 lock_sop = list_entry(open_stp->st_lockowners.next,
370 struct nfs4_stateowner, so_perstateid);
371 /* list_del(&open_stp->st_lockowners); */
372 BUG_ON(lock_sop->so_is_open_owner);
373 release_lockowner(lock_sop);
374 }
375}
376
377/* 320/*
378 * We store the NONE, READ, WRITE, and BOTH bits separately in the 321 * We store the NONE, READ, WRITE, and BOTH bits separately in the
379 * st_{access,deny}_bmap field of the stateid, in order to track not 322 * st_{access,deny}_bmap field of the stateid, in order to track not
@@ -446,13 +389,74 @@ static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp)
446 return nfs4_access_to_omode(access); 389 return nfs4_access_to_omode(access);
447} 390}
448 391
449static void release_open_stateid(struct nfs4_stateid *stp) 392static void unhash_generic_stateid(struct nfs4_stateid *stp)
450{ 393{
451 int oflag = nfs4_access_bmap_to_omode(stp); 394 list_del(&stp->st_hash);
395 list_del(&stp->st_perfile);
396 list_del(&stp->st_perstateowner);
397}
452 398
399static void free_generic_stateid(struct nfs4_stateid *stp)
400{
401 int oflag;
402
403 if (stp->st_access_bmap) {
404 oflag = nfs4_access_bmap_to_omode(stp);
405 nfs4_file_put_access(stp->st_file, oflag);
406 }
407 put_nfs4_file(stp->st_file);
408 kmem_cache_free(stateid_slab, stp);
409}
410
411static void release_lock_stateid(struct nfs4_stateid *stp)
412{
413 struct file *file;
414
415 unhash_generic_stateid(stp);
416 file = find_any_file(stp->st_file);
417 if (file)
418 locks_remove_posix(file, (fl_owner_t)stp->st_stateowner);
419 free_generic_stateid(stp);
420}
421
422static void unhash_lockowner(struct nfs4_stateowner *sop)
423{
424 struct nfs4_stateid *stp;
425
426 list_del(&sop->so_idhash);
427 list_del(&sop->so_strhash);
428 list_del(&sop->so_perstateid);
429 while (!list_empty(&sop->so_stateids)) {
430 stp = list_first_entry(&sop->so_stateids,
431 struct nfs4_stateid, st_perstateowner);
432 release_lock_stateid(stp);
433 }
434}
435
436static void release_lockowner(struct nfs4_stateowner *sop)
437{
438 unhash_lockowner(sop);
439 nfs4_put_stateowner(sop);
440}
441
442static void
443release_stateid_lockowners(struct nfs4_stateid *open_stp)
444{
445 struct nfs4_stateowner *lock_sop;
446
447 while (!list_empty(&open_stp->st_lockowners)) {
448 lock_sop = list_entry(open_stp->st_lockowners.next,
449 struct nfs4_stateowner, so_perstateid);
450 /* list_del(&open_stp->st_lockowners); */
451 BUG_ON(lock_sop->so_is_open_owner);
452 release_lockowner(lock_sop);
453 }
454}
455
456static void release_open_stateid(struct nfs4_stateid *stp)
457{
453 unhash_generic_stateid(stp); 458 unhash_generic_stateid(stp);
454 release_stateid_lockowners(stp); 459 release_stateid_lockowners(stp);
455 nfs4_file_put_access(stp->st_file, oflag);
456 free_generic_stateid(stp); 460 free_generic_stateid(stp);
457} 461}
458 462
@@ -608,7 +612,8 @@ static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4
608 u32 maxrpc = nfsd_serv->sv_max_mesg; 612 u32 maxrpc = nfsd_serv->sv_max_mesg;
609 613
610 new->maxreqs = numslots; 614 new->maxreqs = numslots;
611 new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ; 615 new->maxresp_cached = min_t(u32, req->maxresp_cached,
616 slotsize + NFSD_MIN_HDR_SEQ_SZ);
612 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); 617 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
613 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); 618 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
614 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); 619 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
@@ -3054,7 +3059,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
3054 if (ONE_STATEID(stateid) && (flags & RD_STATE)) 3059 if (ONE_STATEID(stateid) && (flags & RD_STATE))
3055 return nfs_ok; 3060 return nfs_ok;
3056 else if (locks_in_grace()) { 3061 else if (locks_in_grace()) {
3057 /* Answer in remaining cases depends on existance of 3062 /* Answer in remaining cases depends on existence of
3058 * conflicting state; so we must wait out the grace period. */ 3063 * conflicting state; so we must wait out the grace period. */
3059 return nfserr_grace; 3064 return nfserr_grace;
3060 } else if (flags & WR_STATE) 3065 } else if (flags & WR_STATE)
@@ -3674,7 +3679,7 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid,
3674/* 3679/*
3675 * Alloc a lock owner structure. 3680 * Alloc a lock owner structure.
3676 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 3681 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
3677 * occured. 3682 * occurred.
3678 * 3683 *
3679 * strhashval = lock_ownerstr_hashval 3684 * strhashval = lock_ownerstr_hashval
3680 */ 3685 */
@@ -3735,6 +3740,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
3735 stp->st_stateid.si_stateownerid = sop->so_id; 3740 stp->st_stateid.si_stateownerid = sop->so_id;
3736 stp->st_stateid.si_fileid = fp->fi_id; 3741 stp->st_stateid.si_fileid = fp->fi_id;
3737 stp->st_stateid.si_generation = 0; 3742 stp->st_stateid.si_generation = 0;
3743 stp->st_access_bmap = 0;
3738 stp->st_deny_bmap = open_stp->st_deny_bmap; 3744 stp->st_deny_bmap = open_stp->st_deny_bmap;
3739 stp->st_openstp = open_stp; 3745 stp->st_openstp = open_stp;
3740 3746
@@ -3749,6 +3755,17 @@ check_lock_length(u64 offset, u64 length)
3749 LOFF_OVERFLOW(offset, length))); 3755 LOFF_OVERFLOW(offset, length)));
3750} 3756}
3751 3757
3758static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access)
3759{
3760 struct nfs4_file *fp = lock_stp->st_file;
3761 int oflag = nfs4_access_to_omode(access);
3762
3763 if (test_bit(access, &lock_stp->st_access_bmap))
3764 return;
3765 nfs4_file_get_access(fp, oflag);
3766 __set_bit(access, &lock_stp->st_access_bmap);
3767}
3768
3752/* 3769/*
3753 * LOCK operation 3770 * LOCK operation
3754 */ 3771 */
@@ -3765,7 +3782,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3765 struct file_lock conflock; 3782 struct file_lock conflock;
3766 __be32 status = 0; 3783 __be32 status = 0;
3767 unsigned int strhashval; 3784 unsigned int strhashval;
3768 unsigned int cmd;
3769 int err; 3785 int err;
3770 3786
3771 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", 3787 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -3847,22 +3863,18 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3847 switch (lock->lk_type) { 3863 switch (lock->lk_type) {
3848 case NFS4_READ_LT: 3864 case NFS4_READ_LT:
3849 case NFS4_READW_LT: 3865 case NFS4_READW_LT:
3850 if (find_readable_file(lock_stp->st_file)) { 3866 filp = find_readable_file(lock_stp->st_file);
3851 nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ); 3867 if (filp)
3852 filp = find_readable_file(lock_stp->st_file); 3868 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
3853 }
3854 file_lock.fl_type = F_RDLCK; 3869 file_lock.fl_type = F_RDLCK;
3855 cmd = F_SETLK; 3870 break;
3856 break;
3857 case NFS4_WRITE_LT: 3871 case NFS4_WRITE_LT:
3858 case NFS4_WRITEW_LT: 3872 case NFS4_WRITEW_LT:
3859 if (find_writeable_file(lock_stp->st_file)) { 3873 filp = find_writeable_file(lock_stp->st_file);
3860 nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE); 3874 if (filp)
3861 filp = find_writeable_file(lock_stp->st_file); 3875 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
3862 }
3863 file_lock.fl_type = F_WRLCK; 3876 file_lock.fl_type = F_WRLCK;
3864 cmd = F_SETLK; 3877 break;
3865 break;
3866 default: 3878 default:
3867 status = nfserr_inval; 3879 status = nfserr_inval;
3868 goto out; 3880 goto out;
@@ -3886,7 +3898,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3886 * Note: locks.c uses the BKL to protect the inode's lock list. 3898 * Note: locks.c uses the BKL to protect the inode's lock list.
3887 */ 3899 */
3888 3900
3889 err = vfs_lock_file(filp, cmd, &file_lock, &conflock); 3901 err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock);
3890 switch (-err) { 3902 switch (-err) {
3891 case 0: /* success! */ 3903 case 0: /* success! */
3892 update_stateid(&lock_stp->st_stateid); 3904 update_stateid(&lock_stp->st_stateid);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 615f0a9f0600..c6766af00d98 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1142 1142
1143 u32 dummy; 1143 u32 dummy;
1144 char *machine_name; 1144 char *machine_name;
1145 int i, j; 1145 int i;
1146 int nr_secflavs; 1146 int nr_secflavs;
1147 1147
1148 READ_BUF(16); 1148 READ_BUF(16);
@@ -1215,8 +1215,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1215 READ_BUF(4); 1215 READ_BUF(4);
1216 READ32(dummy); 1216 READ32(dummy);
1217 READ_BUF(dummy * 4); 1217 READ_BUF(dummy * 4);
1218 for (j = 0; j < dummy; ++j)
1219 READ32(dummy);
1220 break; 1218 break;
1221 case RPC_AUTH_GSS: 1219 case RPC_AUTH_GSS:
1222 dprintk("RPC_AUTH_GSS callback secflavor " 1220 dprintk("RPC_AUTH_GSS callback secflavor "
@@ -1232,7 +1230,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1232 READ_BUF(4); 1230 READ_BUF(4);
1233 READ32(dummy); 1231 READ32(dummy);
1234 READ_BUF(dummy); 1232 READ_BUF(dummy);
1235 p += XDR_QUADLEN(dummy);
1236 break; 1233 break;
1237 default: 1234 default:
1238 dprintk("Illegal callback secflavor\n"); 1235 dprintk("Illegal callback secflavor\n");
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 33b3e2b06779..1f5eae40f34e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,13 +12,14 @@
12#include <linux/nfsd/syscall.h> 12#include <linux/nfsd/syscall.h>
13#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
14#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/gss_api.h>
15 16
16#include "idmap.h" 17#include "idmap.h"
17#include "nfsd.h" 18#include "nfsd.h"
18#include "cache.h" 19#include "cache.h"
19 20
20/* 21/*
21 * We have a single directory with 9 nodes in it. 22 * We have a single directory with several nodes in it.
22 */ 23 */
23enum { 24enum {
24 NFSD_Root = 1, 25 NFSD_Root = 1,
@@ -42,6 +43,7 @@ enum {
42 NFSD_Versions, 43 NFSD_Versions,
43 NFSD_Ports, 44 NFSD_Ports,
44 NFSD_MaxBlkSize, 45 NFSD_MaxBlkSize,
46 NFSD_SupportedEnctypes,
45 /* 47 /*
46 * The below MUST come last. Otherwise we leave a hole in nfsd_files[] 48 * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
47 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops 49 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -187,6 +189,34 @@ static struct file_operations export_features_operations = {
187 .release = single_release, 189 .release = single_release,
188}; 190};
189 191
192#ifdef CONFIG_SUNRPC_GSS
193static int supported_enctypes_show(struct seq_file *m, void *v)
194{
195 struct gss_api_mech *k5mech;
196
197 k5mech = gss_mech_get_by_name("krb5");
198 if (k5mech == NULL)
199 goto out;
200 if (k5mech->gm_upcall_enctypes != NULL)
201 seq_printf(m, k5mech->gm_upcall_enctypes);
202 gss_mech_put(k5mech);
203out:
204 return 0;
205}
206
207static int supported_enctypes_open(struct inode *inode, struct file *file)
208{
209 return single_open(file, supported_enctypes_show, NULL);
210}
211
212static struct file_operations supported_enctypes_ops = {
213 .open = supported_enctypes_open,
214 .read = seq_read,
215 .llseek = seq_lseek,
216 .release = single_release,
217};
218#endif /* CONFIG_SUNRPC_GSS */
219
190extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 220extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
191extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 221extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
192 222
@@ -1397,6 +1427,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1397 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 1427 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
1398 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 1428 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
1399 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1429 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1430#ifdef CONFIG_SUNRPC_GSS
1431 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
1432#endif /* CONFIG_SUNRPC_GSS */
1400#ifdef CONFIG_NFSD_V4 1433#ifdef CONFIG_NFSD_V4
1401 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1434 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1402 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1435 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 4ce005dbf3e6..65ec595e2226 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -451,7 +451,7 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
451 *p++ = htonl(resp->count); 451 *p++ = htonl(resp->count);
452 xdr_ressize_check(rqstp, p); 452 xdr_ressize_check(rqstp, p);
453 453
454 /* now update rqstp->rq_res to reflect data aswell */ 454 /* now update rqstp->rq_res to reflect data as well */
455 rqstp->rq_res.page_len = resp->count; 455 rqstp->rq_res.page_len = resp->count;
456 if (resp->count & 3) { 456 if (resp->count & 3) {
457 /* need to pad the tail */ 457 /* need to pad the tail */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2d31224b07bf..6bd2f3c21f2b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -367,16 +367,12 @@ struct nfs4_file {
367 struct list_head fi_delegations; 367 struct list_head fi_delegations;
368 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ 368 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
369 struct file * fi_fds[3]; 369 struct file * fi_fds[3];
370 /* One each for O_RDONLY, O_WRONLY: */
371 atomic_t fi_access[2];
372 /* 370 /*
373 * Each open stateid contributes 1 to either fi_readers or 371 * Each open or lock stateid contributes 1 to either
374 * fi_writers, or both, depending on the open mode. A 372 * fi_access[O_RDONLY], fi_access[O_WRONLY], or both, depending
375 * delegation also takes an fi_readers reference. Lock 373 * on open or lock mode:
376 * stateid's take none.
377 */ 374 */
378 atomic_t fi_readers; 375 atomic_t fi_access[2];
379 atomic_t fi_writers;
380 struct file *fi_deleg_file; 376 struct file *fi_deleg_file;
381 struct file_lock *fi_lease; 377 struct file_lock *fi_lease;
382 atomic_t fi_delegees; 378 atomic_t fi_delegees;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ff93025ae2f7..129f3c9f62d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1363,7 +1363,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1363 goto out; 1363 goto out;
1364 if (!(iap->ia_valid & ATTR_MODE)) 1364 if (!(iap->ia_valid & ATTR_MODE))
1365 iap->ia_mode = 0; 1365 iap->ia_mode = 0;
1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
1367 if (err) 1367 if (err)
1368 goto out; 1368 goto out;
1369 1369
@@ -1385,6 +1385,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1385 if (IS_ERR(dchild)) 1385 if (IS_ERR(dchild))
1386 goto out_nfserr; 1386 goto out_nfserr;
1387 1387
1388 /* If file doesn't exist, check for permissions to create one */
1389 if (!dchild->d_inode) {
1390 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1391 if (err)
1392 goto out;
1393 }
1394
1388 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1395 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1389 if (err) 1396 if (err)
1390 goto out; 1397 goto out;
@@ -1749,8 +1756,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1749 if (host_err) 1756 if (host_err)
1750 goto out_drop_write; 1757 goto out_drop_write;
1751 } 1758 }
1752 if (host_err)
1753 goto out_drop_write;
1754 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1759 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1755 if (!host_err) { 1760 if (!host_err) {
1756 host_err = commit_metadata(tfhp); 1761 host_err = commit_metadata(tfhp);
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index d7fd696e595c..0a0a66d98cce 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -521,8 +521,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
521 group_offset, bitmap)) 521 group_offset, bitmap))
522 printk(KERN_WARNING "%s: entry number %llu already freed\n", 522 printk(KERN_WARNING "%s: entry number %llu already freed\n",
523 __func__, (unsigned long long)req->pr_entry_nr); 523 __func__, (unsigned long long)req->pr_entry_nr);
524 524 else
525 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 525 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
526 526
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
@@ -558,8 +558,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
558 group_offset, bitmap)) 558 group_offset, bitmap))
559 printk(KERN_WARNING "%s: entry number %llu already freed\n", 559 printk(KERN_WARNING "%s: entry number %llu already freed\n",
560 __func__, (unsigned long long)req->pr_entry_nr); 560 __func__, (unsigned long long)req->pr_entry_nr);
561 561 else
562 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 562 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
563 563
564 kunmap(req->pr_bitmap_bh->b_page); 564 kunmap(req->pr_bitmap_bh->b_page);
565 kunmap(req->pr_desc_bh->b_page); 565 kunmap(req->pr_desc_bh->b_page);
@@ -665,7 +665,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
665 for (j = i, n = 0; 665 for (j = i, n = 0;
666 (j < nitems) && nilfs_palloc_group_is_in(inode, group, 666 (j < nitems) && nilfs_palloc_group_is_in(inode, group,
667 entry_nrs[j]); 667 entry_nrs[j]);
668 j++, n++) { 668 j++) {
669 nilfs_palloc_group(inode, entry_nrs[j], &group_offset); 669 nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
670 if (!nilfs_clear_bit_atomic( 670 if (!nilfs_clear_bit_atomic(
671 nilfs_mdt_bgl_lock(inode, group), 671 nilfs_mdt_bgl_lock(inode, group),
@@ -674,6 +674,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
674 "%s: entry number %llu already freed\n", 674 "%s: entry number %llu already freed\n",
675 __func__, 675 __func__,
676 (unsigned long long)entry_nrs[j]); 676 (unsigned long long)entry_nrs[j]);
677 } else {
678 n++;
677 } 679 }
678 } 680 }
679 nilfs_palloc_group_desc_add_entries(inode, group, desc, n); 681 nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 9af34a7e6e13..f5fde36b9e28 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -74,7 +74,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
74 74
75#define nilfs_set_bit_atomic ext2_set_bit_atomic 75#define nilfs_set_bit_atomic ext2_set_bit_atomic
76#define nilfs_clear_bit_atomic ext2_clear_bit_atomic 76#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
77#define nilfs_find_next_zero_bit ext2_find_next_zero_bit 77#define nilfs_find_next_zero_bit find_next_zero_bit_le
78 78
79/* 79/*
80 * persistent object allocator cache 80 * persistent object allocator cache
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 3ee67c67cc52..4723f04e9b12 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -25,7 +25,6 @@
25#include <linux/errno.h> 25#include <linux/errno.h>
26#include "nilfs.h" 26#include "nilfs.h"
27#include "bmap.h" 27#include "bmap.h"
28#include "sb.h"
29#include "btree.h" 28#include "btree.h"
30#include "direct.h" 29#include "direct.h"
31#include "btnode.h" 30#include "btnode.h"
@@ -425,17 +424,6 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
425/* 424/*
426 * Internal use only 425 * Internal use only
427 */ 426 */
428
429void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n)
430{
431 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
432}
433
434void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
435{
436 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
437}
438
439__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, 427__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
440 const struct buffer_head *bh) 428 const struct buffer_head *bh)
441{ 429{
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index bde1c0aa2e15..40d9f453d31c 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -240,9 +240,6 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
240__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); 240__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
241__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); 241__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
242 242
243void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
244void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
245
246 243
247/* Assume that bmap semaphore is locked. */ 244/* Assume that bmap semaphore is locked. */
248static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) 245static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 85f7baa15f5d..609cd223eea8 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,15 +34,10 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37
38static const struct address_space_operations def_btnode_aops = {
39 .sync_page = block_sync_page,
40};
41
42void nilfs_btnode_cache_init(struct address_space *btnc, 37void nilfs_btnode_cache_init(struct address_space *btnc,
43 struct backing_dev_info *bdi) 38 struct backing_dev_info *bdi)
44{ 39{
45 nilfs_mapping_init(btnc, bdi, &def_btnode_aops); 40 nilfs_mapping_init(btnc, bdi);
46} 41}
47 42
48void nilfs_btnode_cache_clear(struct address_space *btnc) 43void nilfs_btnode_cache_clear(struct address_space *btnc)
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 300c2bc00c3f..d451ae0e0bf3 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1174,7 +1174,7 @@ static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1174 if (ret < 0) 1174 if (ret < 0)
1175 goto out; 1175 goto out;
1176 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1176 nilfs_btree_commit_insert(btree, path, level, key, ptr);
1177 nilfs_bmap_add_blocks(btree, stats.bs_nblocks); 1177 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1178 1178
1179 out: 1179 out:
1180 nilfs_btree_free_path(path); 1180 nilfs_btree_free_path(path);
@@ -1511,7 +1511,7 @@ static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1511 if (ret < 0) 1511 if (ret < 0)
1512 goto out; 1512 goto out;
1513 nilfs_btree_commit_delete(btree, path, level, dat); 1513 nilfs_btree_commit_delete(btree, path, level, dat);
1514 nilfs_bmap_sub_blocks(btree, stats.bs_nblocks); 1514 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1515 1515
1516out: 1516out:
1517 nilfs_btree_free_path(path); 1517 nilfs_btree_free_path(path);
@@ -1776,7 +1776,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1776 return ret; 1776 return ret;
1777 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1777 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1778 di, ni, bh); 1778 di, ni, bh);
1779 nilfs_bmap_add_blocks(btree, stats.bs_nblocks); 1779 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1780 return 0; 1780 return 0;
1781} 1781}
1782 1782
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 9d45773b79e6..3a1923943b14 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -440,7 +440,6 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
440 nilfs_commit_chunk(page, mapping, from, to); 440 nilfs_commit_chunk(page, mapping, from, to);
441 nilfs_put_page(page); 441 nilfs_put_page(page);
442 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 442 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
443/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
444} 443}
445 444
446/* 445/*
@@ -531,7 +530,6 @@ got_it:
531 nilfs_set_de_type(de, inode); 530 nilfs_set_de_type(de, inode);
532 nilfs_commit_chunk(page, page->mapping, from, to); 531 nilfs_commit_chunk(page, page->mapping, from, to);
533 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 532 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
534/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
535 nilfs_mark_inode_dirty(dir); 533 nilfs_mark_inode_dirty(dir);
536 /* OFFSET_CACHE */ 534 /* OFFSET_CACHE */
537out_put: 535out_put:
@@ -579,7 +577,6 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
579 dir->inode = 0; 577 dir->inode = 0;
580 nilfs_commit_chunk(page, mapping, from, to); 578 nilfs_commit_chunk(page, mapping, from, to);
581 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 579 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
582/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */
583out: 580out:
584 nilfs_put_page(page); 581 nilfs_put_page(page);
585 return err; 582 return err;
@@ -684,7 +681,7 @@ const struct file_operations nilfs_dir_operations = {
684 .readdir = nilfs_readdir, 681 .readdir = nilfs_readdir,
685 .unlocked_ioctl = nilfs_ioctl, 682 .unlocked_ioctl = nilfs_ioctl,
686#ifdef CONFIG_COMPAT 683#ifdef CONFIG_COMPAT
687 .compat_ioctl = nilfs_ioctl, 684 .compat_ioctl = nilfs_compat_ioctl,
688#endif /* CONFIG_COMPAT */ 685#endif /* CONFIG_COMPAT */
689 .fsync = nilfs_sync_file, 686 .fsync = nilfs_sync_file,
690 687
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 324d80c57518..82f4865e86dd 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -146,7 +146,7 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
146 if (NILFS_BMAP_USE_VBN(bmap)) 146 if (NILFS_BMAP_USE_VBN(bmap))
147 nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); 147 nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr);
148 148
149 nilfs_bmap_add_blocks(bmap, 1); 149 nilfs_inode_add_blocks(bmap->b_inode, 1);
150 } 150 }
151 return ret; 151 return ret;
152} 152}
@@ -168,7 +168,7 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
168 if (!ret) { 168 if (!ret) {
169 nilfs_bmap_commit_end_ptr(bmap, &req, dat); 169 nilfs_bmap_commit_end_ptr(bmap, &req, dat);
170 nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); 170 nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR);
171 nilfs_bmap_sub_blocks(bmap, 1); 171 nilfs_inode_sub_blocks(bmap->b_inode, 1);
172 } 172 }
173 return ret; 173 return ret;
174} 174}
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 2f560c9fb808..397e73258631 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -59,7 +59,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
59 struct nilfs_transaction_info ti; 59 struct nilfs_transaction_info ti;
60 int ret; 60 int ret;
61 61
62 if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) 62 if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
63 return VM_FAULT_SIGBUS; /* -ENOSPC */ 63 return VM_FAULT_SIGBUS; /* -ENOSPC */
64 64
65 lock_page(page); 65 lock_page(page);
@@ -72,10 +72,9 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
72 /* 72 /*
73 * check to see if the page is mapped already (no holes) 73 * check to see if the page is mapped already (no holes)
74 */ 74 */
75 if (PageMappedToDisk(page)) { 75 if (PageMappedToDisk(page))
76 unlock_page(page);
77 goto mapped; 76 goto mapped;
78 } 77
79 if (page_has_buffers(page)) { 78 if (page_has_buffers(page)) {
80 struct buffer_head *bh, *head; 79 struct buffer_head *bh, *head;
81 int fully_mapped = 1; 80 int fully_mapped = 1;
@@ -90,7 +89,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
90 89
91 if (fully_mapped) { 90 if (fully_mapped) {
92 SetPageMappedToDisk(page); 91 SetPageMappedToDisk(page);
93 unlock_page(page);
94 goto mapped; 92 goto mapped;
95 } 93 }
96 } 94 }
@@ -105,16 +103,17 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
105 return VM_FAULT_SIGBUS; 103 return VM_FAULT_SIGBUS;
106 104
107 ret = block_page_mkwrite(vma, vmf, nilfs_get_block); 105 ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
108 if (unlikely(ret)) { 106 if (ret != VM_FAULT_LOCKED) {
109 nilfs_transaction_abort(inode->i_sb); 107 nilfs_transaction_abort(inode->i_sb);
110 return ret; 108 return ret;
111 } 109 }
110 nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
112 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
113 112
114 mapped: 113 mapped:
115 SetPageChecked(page); 114 SetPageChecked(page);
116 wait_on_page_writeback(page); 115 wait_on_page_writeback(page);
117 return 0; 116 return VM_FAULT_LOCKED;
118} 117}
119 118
120static const struct vm_operations_struct nilfs_file_vm_ops = { 119static const struct vm_operations_struct nilfs_file_vm_ops = {
@@ -142,7 +141,7 @@ const struct file_operations nilfs_file_operations = {
142 .aio_write = generic_file_aio_write, 141 .aio_write = generic_file_aio_write,
143 .unlocked_ioctl = nilfs_ioctl, 142 .unlocked_ioctl = nilfs_ioctl,
144#ifdef CONFIG_COMPAT 143#ifdef CONFIG_COMPAT
145 .compat_ioctl = nilfs_ioctl, 144 .compat_ioctl = nilfs_compat_ioctl,
146#endif /* CONFIG_COMPAT */ 145#endif /* CONFIG_COMPAT */
147 .mmap = nilfs_file_mmap, 146 .mmap = nilfs_file_mmap,
148 .open = generic_file_open, 147 .open = generic_file_open,
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index caf9a6a3fb54..1c2a3e23f8b2 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -49,7 +49,6 @@
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = { 51static const struct address_space_operations def_gcinode_aops = {
52 .sync_page = block_sync_page,
53}; 52};
54 53
55/* 54/*
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2fd440d8d6b8..c0aa27490c02 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -41,6 +41,24 @@ struct nilfs_iget_args {
41 int for_gc; 41 int for_gc;
42}; 42};
43 43
44void nilfs_inode_add_blocks(struct inode *inode, int n)
45{
46 struct nilfs_root *root = NILFS_I(inode)->i_root;
47
48 inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
49 if (root)
50 atomic_add(n, &root->blocks_count);
51}
52
53void nilfs_inode_sub_blocks(struct inode *inode, int n)
54{
55 struct nilfs_root *root = NILFS_I(inode)->i_root;
56
57 inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
58 if (root)
59 atomic_sub(n, &root->blocks_count);
60}
61
44/** 62/**
45 * nilfs_get_block() - get a file block on the filesystem (callback function) 63 * nilfs_get_block() - get a file block on the filesystem (callback function)
46 * @inode - inode struct of the target file 64 * @inode - inode struct of the target file
@@ -262,7 +280,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
262const struct address_space_operations nilfs_aops = { 280const struct address_space_operations nilfs_aops = {
263 .writepage = nilfs_writepage, 281 .writepage = nilfs_writepage,
264 .readpage = nilfs_readpage, 282 .readpage = nilfs_readpage,
265 .sync_page = block_sync_page,
266 .writepages = nilfs_writepages, 283 .writepages = nilfs_writepages,
267 .set_page_dirty = nilfs_set_page_dirty, 284 .set_page_dirty = nilfs_set_page_dirty,
268 .readpages = nilfs_readpages, 285 .readpages = nilfs_readpages,
@@ -277,7 +294,7 @@ const struct address_space_operations nilfs_aops = {
277struct inode *nilfs_new_inode(struct inode *dir, int mode) 294struct inode *nilfs_new_inode(struct inode *dir, int mode)
278{ 295{
279 struct super_block *sb = dir->i_sb; 296 struct super_block *sb = dir->i_sb;
280 struct nilfs_sb_info *sbi = NILFS_SB(sb); 297 struct the_nilfs *nilfs = sb->s_fs_info;
281 struct inode *inode; 298 struct inode *inode;
282 struct nilfs_inode_info *ii; 299 struct nilfs_inode_info *ii;
283 struct nilfs_root *root; 300 struct nilfs_root *root;
@@ -315,19 +332,16 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
315 /* No lock is needed; iget() ensures it. */ 332 /* No lock is needed; iget() ensures it. */
316 } 333 }
317 334
318 ii->i_flags = NILFS_I(dir)->i_flags; 335 ii->i_flags = nilfs_mask_flags(
319 if (S_ISLNK(mode)) 336 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
320 ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL);
321 if (!S_ISDIR(mode))
322 ii->i_flags &= ~NILFS_DIRSYNC_FL;
323 337
324 /* ii->i_file_acl = 0; */ 338 /* ii->i_file_acl = 0; */
325 /* ii->i_dir_acl = 0; */ 339 /* ii->i_dir_acl = 0; */
326 ii->i_dir_start_lookup = 0; 340 ii->i_dir_start_lookup = 0;
327 nilfs_set_inode_flags(inode); 341 nilfs_set_inode_flags(inode);
328 spin_lock(&sbi->s_next_gen_lock); 342 spin_lock(&nilfs->ns_next_gen_lock);
329 inode->i_generation = sbi->s_next_generation++; 343 inode->i_generation = nilfs->ns_next_generation++;
330 spin_unlock(&sbi->s_next_gen_lock); 344 spin_unlock(&nilfs->ns_next_gen_lock);
331 insert_inode_hash(inode); 345 insert_inode_hash(inode);
332 346
333 err = nilfs_init_acl(inode, dir); 347 err = nilfs_init_acl(inode, dir);
@@ -359,17 +373,15 @@ void nilfs_set_inode_flags(struct inode *inode)
359 373
360 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | 374 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
361 S_DIRSYNC); 375 S_DIRSYNC);
362 if (flags & NILFS_SYNC_FL) 376 if (flags & FS_SYNC_FL)
363 inode->i_flags |= S_SYNC; 377 inode->i_flags |= S_SYNC;
364 if (flags & NILFS_APPEND_FL) 378 if (flags & FS_APPEND_FL)
365 inode->i_flags |= S_APPEND; 379 inode->i_flags |= S_APPEND;
366 if (flags & NILFS_IMMUTABLE_FL) 380 if (flags & FS_IMMUTABLE_FL)
367 inode->i_flags |= S_IMMUTABLE; 381 inode->i_flags |= S_IMMUTABLE;
368#ifndef NILFS_ATIME_DISABLE 382 if (flags & FS_NOATIME_FL)
369 if (flags & NILFS_NOATIME_FL)
370#endif
371 inode->i_flags |= S_NOATIME; 383 inode->i_flags |= S_NOATIME;
372 if (flags & NILFS_DIRSYNC_FL) 384 if (flags & FS_DIRSYNC_FL)
373 inode->i_flags |= S_DIRSYNC; 385 inode->i_flags |= S_DIRSYNC;
374 mapping_set_gfp_mask(inode->i_mapping, 386 mapping_set_gfp_mask(inode->i_mapping,
375 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 387 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
@@ -420,7 +432,7 @@ static int __nilfs_read_inode(struct super_block *sb,
420 struct nilfs_root *root, unsigned long ino, 432 struct nilfs_root *root, unsigned long ino,
421 struct inode *inode) 433 struct inode *inode)
422{ 434{
423 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 435 struct the_nilfs *nilfs = sb->s_fs_info;
424 struct buffer_head *bh; 436 struct buffer_head *bh;
425 struct nilfs_inode *raw_inode; 437 struct nilfs_inode *raw_inode;
426 int err; 438 int err;
@@ -707,6 +719,7 @@ void nilfs_evict_inode(struct inode *inode)
707 struct nilfs_transaction_info ti; 719 struct nilfs_transaction_info ti;
708 struct super_block *sb = inode->i_sb; 720 struct super_block *sb = inode->i_sb;
709 struct nilfs_inode_info *ii = NILFS_I(inode); 721 struct nilfs_inode_info *ii = NILFS_I(inode);
722 int ret;
710 723
711 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 724 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
712 if (inode->i_data.nrpages) 725 if (inode->i_data.nrpages)
@@ -725,8 +738,9 @@ void nilfs_evict_inode(struct inode *inode)
725 nilfs_mark_inode_dirty(inode); 738 nilfs_mark_inode_dirty(inode);
726 end_writeback(inode); 739 end_writeback(inode);
727 740
728 nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 741 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
729 atomic_dec(&ii->i_root->inodes_count); 742 if (!ret)
743 atomic_dec(&ii->i_root->inodes_count);
730 744
731 nilfs_clear_inode(inode); 745 nilfs_clear_inode(inode);
732 746
@@ -792,18 +806,18 @@ int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
792 806
793int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 807int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
794{ 808{
795 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); 809 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
796 struct nilfs_inode_info *ii = NILFS_I(inode); 810 struct nilfs_inode_info *ii = NILFS_I(inode);
797 int err; 811 int err;
798 812
799 spin_lock(&sbi->s_inode_lock); 813 spin_lock(&nilfs->ns_inode_lock);
800 if (ii->i_bh == NULL) { 814 if (ii->i_bh == NULL) {
801 spin_unlock(&sbi->s_inode_lock); 815 spin_unlock(&nilfs->ns_inode_lock);
802 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 816 err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
803 inode->i_ino, pbh); 817 inode->i_ino, pbh);
804 if (unlikely(err)) 818 if (unlikely(err))
805 return err; 819 return err;
806 spin_lock(&sbi->s_inode_lock); 820 spin_lock(&nilfs->ns_inode_lock);
807 if (ii->i_bh == NULL) 821 if (ii->i_bh == NULL)
808 ii->i_bh = *pbh; 822 ii->i_bh = *pbh;
809 else { 823 else {
@@ -814,36 +828,36 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
814 *pbh = ii->i_bh; 828 *pbh = ii->i_bh;
815 829
816 get_bh(*pbh); 830 get_bh(*pbh);
817 spin_unlock(&sbi->s_inode_lock); 831 spin_unlock(&nilfs->ns_inode_lock);
818 return 0; 832 return 0;
819} 833}
820 834
821int nilfs_inode_dirty(struct inode *inode) 835int nilfs_inode_dirty(struct inode *inode)
822{ 836{
823 struct nilfs_inode_info *ii = NILFS_I(inode); 837 struct nilfs_inode_info *ii = NILFS_I(inode);
824 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); 838 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
825 int ret = 0; 839 int ret = 0;
826 840
827 if (!list_empty(&ii->i_dirty)) { 841 if (!list_empty(&ii->i_dirty)) {
828 spin_lock(&sbi->s_inode_lock); 842 spin_lock(&nilfs->ns_inode_lock);
829 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 843 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
830 test_bit(NILFS_I_BUSY, &ii->i_state); 844 test_bit(NILFS_I_BUSY, &ii->i_state);
831 spin_unlock(&sbi->s_inode_lock); 845 spin_unlock(&nilfs->ns_inode_lock);
832 } 846 }
833 return ret; 847 return ret;
834} 848}
835 849
836int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) 850int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
837{ 851{
838 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
839 struct nilfs_inode_info *ii = NILFS_I(inode); 852 struct nilfs_inode_info *ii = NILFS_I(inode);
853 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
840 854
841 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); 855 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
842 856
843 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 857 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
844 return 0; 858 return 0;
845 859
846 spin_lock(&sbi->s_inode_lock); 860 spin_lock(&nilfs->ns_inode_lock);
847 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 861 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
848 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 862 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
849 /* Because this routine may race with nilfs_dispose_list(), 863 /* Because this routine may race with nilfs_dispose_list(),
@@ -851,18 +865,18 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
851 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 865 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
852 /* This will happen when somebody is freeing 866 /* This will happen when somebody is freeing
853 this inode. */ 867 this inode. */
854 nilfs_warning(sbi->s_super, __func__, 868 nilfs_warning(inode->i_sb, __func__,
855 "cannot get inode (ino=%lu)\n", 869 "cannot get inode (ino=%lu)\n",
856 inode->i_ino); 870 inode->i_ino);
857 spin_unlock(&sbi->s_inode_lock); 871 spin_unlock(&nilfs->ns_inode_lock);
858 return -EINVAL; /* NILFS_I_DIRTY may remain for 872 return -EINVAL; /* NILFS_I_DIRTY may remain for
859 freeing inode */ 873 freeing inode */
860 } 874 }
861 list_del(&ii->i_dirty); 875 list_del(&ii->i_dirty);
862 list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); 876 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
863 set_bit(NILFS_I_QUEUED, &ii->i_state); 877 set_bit(NILFS_I_QUEUED, &ii->i_state);
864 } 878 }
865 spin_unlock(&sbi->s_inode_lock); 879 spin_unlock(&nilfs->ns_inode_lock);
866 return 0; 880 return 0;
867} 881}
868 882
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 496738963fdb..f2469ba6246b 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -26,7 +26,9 @@
26#include <linux/capability.h> /* capable() */ 26#include <linux/capability.h> /* capable() */
27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ 27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/compat.h> /* compat_ptr() */
29#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */ 30#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */
31#include <linux/buffer_head.h>
30#include <linux/nilfs2_fs.h> 32#include <linux/nilfs2_fs.h>
31#include "nilfs.h" 33#include "nilfs.h"
32#include "segment.h" 34#include "segment.h"
@@ -97,11 +99,74 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
97 return ret; 99 return ret;
98} 100}
99 101
102static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
103{
104 unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE;
105
106 return put_user(flags, (int __user *)argp);
107}
108
109static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
110 void __user *argp)
111{
112 struct nilfs_transaction_info ti;
113 unsigned int flags, oldflags;
114 int ret;
115
116 if (!inode_owner_or_capable(inode))
117 return -EACCES;
118
119 if (get_user(flags, (int __user *)argp))
120 return -EFAULT;
121
122 ret = mnt_want_write(filp->f_path.mnt);
123 if (ret)
124 return ret;
125
126 flags = nilfs_mask_flags(inode->i_mode, flags);
127
128 mutex_lock(&inode->i_mutex);
129
130 oldflags = NILFS_I(inode)->i_flags;
131
132 /*
133 * The IMMUTABLE and APPEND_ONLY flags can only be changed by the
134 * relevant capability.
135 */
136 ret = -EPERM;
137 if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) &&
138 !capable(CAP_LINUX_IMMUTABLE))
139 goto out;
140
141 ret = nilfs_transaction_begin(inode->i_sb, &ti, 0);
142 if (ret)
143 goto out;
144
145 NILFS_I(inode)->i_flags = (oldflags & ~FS_FL_USER_MODIFIABLE) |
146 (flags & FS_FL_USER_MODIFIABLE);
147
148 nilfs_set_inode_flags(inode);
149 inode->i_ctime = CURRENT_TIME;
150 if (IS_SYNC(inode))
151 nilfs_set_transaction_flag(NILFS_TI_SYNC);
152
153 nilfs_mark_inode_dirty(inode);
154 ret = nilfs_transaction_commit(inode->i_sb);
155out:
156 mutex_unlock(&inode->i_mutex);
157 mnt_drop_write(filp->f_path.mnt);
158 return ret;
159}
160
161static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
162{
163 return put_user(inode->i_generation, (int __user *)argp);
164}
165
100static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, 166static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
101 unsigned int cmd, void __user *argp) 167 unsigned int cmd, void __user *argp)
102{ 168{
103 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 169 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
104 struct inode *cpfile = nilfs->ns_cpfile;
105 struct nilfs_transaction_info ti; 170 struct nilfs_transaction_info ti;
106 struct nilfs_cpmode cpmode; 171 struct nilfs_cpmode cpmode;
107 int ret; 172 int ret;
@@ -121,7 +186,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
121 186
122 nilfs_transaction_begin(inode->i_sb, &ti, 0); 187 nilfs_transaction_begin(inode->i_sb, &ti, 0);
123 ret = nilfs_cpfile_change_cpmode( 188 ret = nilfs_cpfile_change_cpmode(
124 cpfile, cpmode.cm_cno, cpmode.cm_mode); 189 nilfs->ns_cpfile, cpmode.cm_cno, cpmode.cm_mode);
125 if (unlikely(ret < 0)) 190 if (unlikely(ret < 0))
126 nilfs_transaction_abort(inode->i_sb); 191 nilfs_transaction_abort(inode->i_sb);
127 else 192 else
@@ -137,7 +202,7 @@ static int
137nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, 202nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
138 unsigned int cmd, void __user *argp) 203 unsigned int cmd, void __user *argp)
139{ 204{
140 struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; 205 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
141 struct nilfs_transaction_info ti; 206 struct nilfs_transaction_info ti;
142 __u64 cno; 207 __u64 cno;
143 int ret; 208 int ret;
@@ -154,7 +219,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
154 goto out; 219 goto out;
155 220
156 nilfs_transaction_begin(inode->i_sb, &ti, 0); 221 nilfs_transaction_begin(inode->i_sb, &ti, 0);
157 ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); 222 ret = nilfs_cpfile_delete_checkpoint(nilfs->ns_cpfile, cno);
158 if (unlikely(ret < 0)) 223 if (unlikely(ret < 0))
159 nilfs_transaction_abort(inode->i_sb); 224 nilfs_transaction_abort(inode->i_sb);
160 else 225 else
@@ -180,7 +245,7 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
180static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, 245static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
181 unsigned int cmd, void __user *argp) 246 unsigned int cmd, void __user *argp)
182{ 247{
183 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 248 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
184 struct nilfs_cpstat cpstat; 249 struct nilfs_cpstat cpstat;
185 int ret; 250 int ret;
186 251
@@ -211,7 +276,7 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
211static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, 276static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
212 unsigned int cmd, void __user *argp) 277 unsigned int cmd, void __user *argp)
213{ 278{
214 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 279 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
215 struct nilfs_sustat sustat; 280 struct nilfs_sustat sustat;
216 int ret; 281 int ret;
217 282
@@ -267,7 +332,7 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
267static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, 332static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
268 unsigned int cmd, void __user *argp) 333 unsigned int cmd, void __user *argp)
269{ 334{
270 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 335 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
271 struct nilfs_argv argv; 336 struct nilfs_argv argv;
272 int ret; 337 int ret;
273 338
@@ -336,7 +401,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
336 struct nilfs_argv *argv, void *buf) 401 struct nilfs_argv *argv, void *buf)
337{ 402{
338 size_t nmembs = argv->v_nmembs; 403 size_t nmembs = argv->v_nmembs;
339 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 404 struct the_nilfs *nilfs = sb->s_fs_info;
340 struct inode *inode; 405 struct inode *inode;
341 struct nilfs_vdesc *vdesc; 406 struct nilfs_vdesc *vdesc;
342 struct buffer_head *bh, *n; 407 struct buffer_head *bh, *n;
@@ -550,7 +615,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
550 ret = PTR_ERR(kbufs[4]); 615 ret = PTR_ERR(kbufs[4]);
551 goto out; 616 goto out;
552 } 617 }
553 nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 618 nilfs = inode->i_sb->s_fs_info;
554 619
555 for (n = 0; n < 4; n++) { 620 for (n = 0; n < 4; n++) {
556 ret = -EINVAL; 621 ret = -EINVAL;
@@ -623,7 +688,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
623 return ret; 688 return ret;
624 689
625 if (argp != NULL) { 690 if (argp != NULL) {
626 nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 691 nilfs = inode->i_sb->s_fs_info;
627 down_read(&nilfs->ns_segctor_sem); 692 down_read(&nilfs->ns_segctor_sem);
628 cno = nilfs->ns_cno - 1; 693 cno = nilfs->ns_cno - 1;
629 up_read(&nilfs->ns_segctor_sem); 694 up_read(&nilfs->ns_segctor_sem);
@@ -641,7 +706,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
641 void *, size_t, size_t)) 706 void *, size_t, size_t))
642 707
643{ 708{
644 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 709 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
645 struct nilfs_argv argv; 710 struct nilfs_argv argv;
646 int ret; 711 int ret;
647 712
@@ -666,6 +731,12 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
666 void __user *argp = (void __user *)arg; 731 void __user *argp = (void __user *)arg;
667 732
668 switch (cmd) { 733 switch (cmd) {
734 case FS_IOC_GETFLAGS:
735 return nilfs_ioctl_getflags(inode, argp);
736 case FS_IOC_SETFLAGS:
737 return nilfs_ioctl_setflags(inode, filp, argp);
738 case FS_IOC_GETVERSION:
739 return nilfs_ioctl_getversion(inode, argp);
669 case NILFS_IOCTL_CHANGE_CPMODE: 740 case NILFS_IOCTL_CHANGE_CPMODE:
670 return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); 741 return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp);
671 case NILFS_IOCTL_DELETE_CHECKPOINT: 742 case NILFS_IOCTL_DELETE_CHECKPOINT:
@@ -696,3 +767,23 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
696 return -ENOTTY; 767 return -ENOTTY;
697 } 768 }
698} 769}
770
771#ifdef CONFIG_COMPAT
772long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
773{
774 switch (cmd) {
775 case FS_IOC32_GETFLAGS:
776 cmd = FS_IOC_GETFLAGS;
777 break;
778 case FS_IOC32_SETFLAGS:
779 cmd = FS_IOC_SETFLAGS;
780 break;
781 case FS_IOC32_GETVERSION:
782 cmd = FS_IOC_GETVERSION;
783 break;
784 default:
785 return -ENOIOCTLCMD;
786 }
787 return nilfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
788}
789#endif
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a0babd2bff6a..a649b05f7069 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -399,7 +399,6 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
399 399
400static const struct address_space_operations def_mdt_aops = { 400static const struct address_space_operations def_mdt_aops = {
401 .writepage = nilfs_mdt_write_page, 401 .writepage = nilfs_mdt_write_page,
402 .sync_page = block_sync_page,
403}; 402};
404 403
405static const struct inode_operations def_mdt_iops; 404static const struct inode_operations def_mdt_iops;
@@ -438,10 +437,6 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
438 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); 437 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
439} 438}
440 439
441static const struct address_space_operations shadow_map_aops = {
442 .sync_page = block_sync_page,
443};
444
445/** 440/**
446 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file 441 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
447 * @inode: inode of the metadata file 442 * @inode: inode of the metadata file
@@ -455,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
455 450
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 453 nilfs_mapping_init(&shadow->frozen_data, bdi);
459 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
461 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
462 return 0; 457 return 0;
463} 458}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index b13734bf3521..ed68563ec708 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -66,7 +66,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) 67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{ 68{
69 return NILFS_SB(inode->i_sb)->s_nilfs; 69 return inode->i_sb->s_fs_info;
70} 70}
71 71
72/* Default GFP flags using highmem */ 72/* Default GFP flags using highmem */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 161791d26458..546849b3e88f 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -482,7 +482,7 @@ static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno,
482 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) 482 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO)
483 return ERR_PTR(-ESTALE); 483 return ERR_PTR(-ESTALE);
484 484
485 root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); 485 root = nilfs_lookup_root(sb->s_fs_info, cno);
486 if (!root) 486 if (!root)
487 return ERR_PTR(-ESTALE); 487 return ERR_PTR(-ESTALE);
488 488
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 777e8fd04304..a8dd344303cb 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -30,7 +30,6 @@
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/nilfs2_fs.h> 31#include <linux/nilfs2_fs.h>
32#include "the_nilfs.h" 32#include "the_nilfs.h"
33#include "sb.h"
34#include "bmap.h" 33#include "bmap.h"
35 34
36/* 35/*
@@ -115,19 +114,19 @@ enum {
115 * Macros to check inode numbers 114 * Macros to check inode numbers
116 */ 115 */
117#define NILFS_MDT_INO_BITS \ 116#define NILFS_MDT_INO_BITS \
118 ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ 117 ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \
119 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ 118 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \
120 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) 119 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO))
121 120
122#define NILFS_SYS_INO_BITS \ 121#define NILFS_SYS_INO_BITS \
123 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) 122 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
124 123
125#define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) 124#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
126 125
127#define NILFS_MDT_INODE(sb, ino) \ 126#define NILFS_MDT_INODE(sb, ino) \
128 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) 127 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
129#define NILFS_VALID_INODE(sb, ino) \ 128#define NILFS_VALID_INODE(sb, ino) \
130 ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) 129 ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino))))
131 130
132/** 131/**
133 * struct nilfs_transaction_info: context information for synchronization 132 * struct nilfs_transaction_info: context information for synchronization
@@ -212,6 +211,23 @@ static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
212 211
213#define NILFS_ATIME_DISABLE 212#define NILFS_ATIME_DISABLE
214 213
214/* Flags that should be inherited by new inodes from their parent. */
215#define NILFS_FL_INHERITED \
216 (FS_SECRM_FL | FS_UNRM_FL | FS_COMPR_FL | FS_SYNC_FL | \
217 FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL |\
218 FS_COMPRBLK_FL | FS_NOCOMP_FL | FS_NOTAIL_FL | FS_DIRSYNC_FL)
219
220/* Mask out flags that are inappropriate for the given type of inode. */
221static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags)
222{
223 if (S_ISDIR(mode))
224 return flags;
225 else if (S_ISREG(mode))
226 return flags & ~(FS_DIRSYNC_FL | FS_TOPDIR_FL);
227 else
228 return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
229}
230
215/* dir.c */ 231/* dir.c */
216extern int nilfs_add_link(struct dentry *, struct inode *); 232extern int nilfs_add_link(struct dentry *, struct inode *);
217extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); 233extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
@@ -229,10 +245,13 @@ extern int nilfs_sync_file(struct file *, int);
229 245
230/* ioctl.c */ 246/* ioctl.c */
231long nilfs_ioctl(struct file *, unsigned int, unsigned long); 247long nilfs_ioctl(struct file *, unsigned int, unsigned long);
248long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
232int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, 249int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
233 void **); 250 void **);
234 251
235/* inode.c */ 252/* inode.c */
253void nilfs_inode_add_blocks(struct inode *inode, int n);
254void nilfs_inode_sub_blocks(struct inode *inode, int n);
236extern struct inode *nilfs_new_inode(struct inode *, int); 255extern struct inode *nilfs_new_inode(struct inode *, int);
237extern void nilfs_free_inode(struct inode *); 256extern void nilfs_free_inode(struct inode *);
238extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 257extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
@@ -266,7 +285,7 @@ extern void nilfs_destroy_inode(struct inode *);
266extern void nilfs_error(struct super_block *, const char *, const char *, ...) 285extern void nilfs_error(struct super_block *, const char *, const char *, ...)
267 __attribute__ ((format (printf, 3, 4))); 286 __attribute__ ((format (printf, 3, 4)));
268extern void nilfs_warning(struct super_block *, const char *, const char *, ...) 287extern void nilfs_warning(struct super_block *, const char *, const char *, ...)
269 __attribute__ ((format (printf, 3, 4))); 288 __attribute__ ((format (printf, 3, 4)));
270extern struct nilfs_super_block * 289extern struct nilfs_super_block *
271nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); 290nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
272extern int nilfs_store_magic_and_option(struct super_block *, 291extern int nilfs_store_magic_and_option(struct super_block *,
@@ -275,11 +294,11 @@ extern int nilfs_check_feature_compatibility(struct super_block *,
275 struct nilfs_super_block *); 294 struct nilfs_super_block *);
276extern void nilfs_set_log_cursor(struct nilfs_super_block *, 295extern void nilfs_set_log_cursor(struct nilfs_super_block *,
277 struct the_nilfs *); 296 struct the_nilfs *);
278extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, 297struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
279 int flip); 298 int flip);
280extern int nilfs_commit_super(struct nilfs_sb_info *, int); 299int nilfs_commit_super(struct super_block *sb, int flag);
281extern int nilfs_cleanup_super(struct nilfs_sb_info *); 300int nilfs_cleanup_super(struct super_block *sb);
282int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, 301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
283 struct nilfs_root **root); 302 struct nilfs_root **root);
284int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
285 304
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index a585b35fd6bc..1168059c7efd 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -493,15 +493,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
493} 493}
494 494
495void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
496 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi)
497 const struct address_space_operations *aops)
498{ 497{
499 mapping->host = NULL; 498 mapping->host = NULL;
500 mapping->flags = 0; 499 mapping->flags = 0;
501 mapping_set_gfp_mask(mapping, GFP_NOFS); 500 mapping_set_gfp_mask(mapping, GFP_NOFS);
502 mapping->assoc_mapping = NULL; 501 mapping->assoc_mapping = NULL;
503 mapping->backing_dev_info = bdi; 502 mapping->backing_dev_info = bdi;
504 mapping->a_ops = aops; 503 mapping->a_ops = &empty_aops;
505} 504}
506 505
507/* 506/*
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 2a00953ebd5f..f06b79ad7493 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -62,8 +62,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
65 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi);
66 const struct address_space_operations *aops);
67unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
68unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 67unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
69 sector_t start_blk, 68 sector_t start_blk,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 3dfcd3b7d389..ba4a64518f38 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -425,7 +425,7 @@ void nilfs_dispose_segment_list(struct list_head *head)
425} 425}
426 426
427static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, 427static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
428 struct nilfs_sb_info *sbi, 428 struct super_block *sb,
429 struct nilfs_recovery_info *ri) 429 struct nilfs_recovery_info *ri)
430{ 430{
431 struct list_head *head = &ri->ri_used_segments; 431 struct list_head *head = &ri->ri_used_segments;
@@ -501,7 +501,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
501} 501}
502 502
503static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, 503static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
504 struct nilfs_sb_info *sbi, 504 struct super_block *sb,
505 struct nilfs_root *root, 505 struct nilfs_root *root,
506 struct list_head *head, 506 struct list_head *head,
507 unsigned long *nr_salvaged_blocks) 507 unsigned long *nr_salvaged_blocks)
@@ -514,7 +514,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
514 int err = 0, err2 = 0; 514 int err = 0, err2 = 0;
515 515
516 list_for_each_entry_safe(rb, n, head, list) { 516 list_for_each_entry_safe(rb, n, head, list) {
517 inode = nilfs_iget(sbi->s_super, root, rb->ino); 517 inode = nilfs_iget(sb, root, rb->ino);
518 if (IS_ERR(inode)) { 518 if (IS_ERR(inode)) {
519 err = PTR_ERR(inode); 519 err = PTR_ERR(inode);
520 inode = NULL; 520 inode = NULL;
@@ -572,11 +572,11 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
572 * nilfs_do_roll_forward - salvage logical segments newer than the latest 572 * nilfs_do_roll_forward - salvage logical segments newer than the latest
573 * checkpoint 573 * checkpoint
574 * @nilfs: nilfs object 574 * @nilfs: nilfs object
575 * @sbi: nilfs_sb_info 575 * @sb: super block instance
576 * @ri: pointer to a nilfs_recovery_info 576 * @ri: pointer to a nilfs_recovery_info
577 */ 577 */
578static int nilfs_do_roll_forward(struct the_nilfs *nilfs, 578static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
579 struct nilfs_sb_info *sbi, 579 struct super_block *sb,
580 struct nilfs_root *root, 580 struct nilfs_root *root,
581 struct nilfs_recovery_info *ri) 581 struct nilfs_recovery_info *ri)
582{ 582{
@@ -648,7 +648,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
648 goto failed; 648 goto failed;
649 if (flags & NILFS_SS_LOGEND) { 649 if (flags & NILFS_SS_LOGEND) {
650 err = nilfs_recover_dsync_blocks( 650 err = nilfs_recover_dsync_blocks(
651 nilfs, sbi, root, &dsync_blocks, 651 nilfs, sb, root, &dsync_blocks,
652 &nsalvaged_blocks); 652 &nsalvaged_blocks);
653 if (unlikely(err)) 653 if (unlikely(err))
654 goto failed; 654 goto failed;
@@ -681,7 +681,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
681 681
682 if (nsalvaged_blocks) { 682 if (nsalvaged_blocks) {
683 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", 683 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
684 sbi->s_super->s_id, nsalvaged_blocks); 684 sb->s_id, nsalvaged_blocks);
685 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; 685 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
686 } 686 }
687 out: 687 out:
@@ -695,7 +695,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
695 printk(KERN_ERR 695 printk(KERN_ERR
696 "NILFS (device %s): Error roll-forwarding " 696 "NILFS (device %s): Error roll-forwarding "
697 "(err=%d, pseg block=%llu). ", 697 "(err=%d, pseg block=%llu). ",
698 sbi->s_super->s_id, err, (unsigned long long)pseg_start); 698 sb->s_id, err, (unsigned long long)pseg_start);
699 goto out; 699 goto out;
700} 700}
701 701
@@ -724,7 +724,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
724/** 724/**
725 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint 725 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
726 * @nilfs: nilfs object 726 * @nilfs: nilfs object
727 * @sbi: nilfs_sb_info 727 * @sb: super block instance
728 * @ri: pointer to a nilfs_recovery_info struct to store search results. 728 * @ri: pointer to a nilfs_recovery_info struct to store search results.
729 * 729 *
730 * Return Value: On success, 0 is returned. On error, one of the following 730 * Return Value: On success, 0 is returned. On error, one of the following
@@ -741,7 +741,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
741 * %-ENOMEM - Insufficient memory available. 741 * %-ENOMEM - Insufficient memory available.
742 */ 742 */
743int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, 743int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
744 struct nilfs_sb_info *sbi, 744 struct super_block *sb,
745 struct nilfs_recovery_info *ri) 745 struct nilfs_recovery_info *ri)
746{ 746{
747 struct nilfs_root *root; 747 struct nilfs_root *root;
@@ -750,32 +750,32 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
750 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) 750 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
751 return 0; 751 return 0;
752 752
753 err = nilfs_attach_checkpoint(sbi, ri->ri_cno, true, &root); 753 err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root);
754 if (unlikely(err)) { 754 if (unlikely(err)) {
755 printk(KERN_ERR 755 printk(KERN_ERR
756 "NILFS: error loading the latest checkpoint.\n"); 756 "NILFS: error loading the latest checkpoint.\n");
757 return err; 757 return err;
758 } 758 }
759 759
760 err = nilfs_do_roll_forward(nilfs, sbi, root, ri); 760 err = nilfs_do_roll_forward(nilfs, sb, root, ri);
761 if (unlikely(err)) 761 if (unlikely(err))
762 goto failed; 762 goto failed;
763 763
764 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { 764 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
765 err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); 765 err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri);
766 if (unlikely(err)) { 766 if (unlikely(err)) {
767 printk(KERN_ERR "NILFS: Error preparing segments for " 767 printk(KERN_ERR "NILFS: Error preparing segments for "
768 "recovery.\n"); 768 "recovery.\n");
769 goto failed; 769 goto failed;
770 } 770 }
771 771
772 err = nilfs_attach_segment_constructor(sbi, root); 772 err = nilfs_attach_log_writer(sb, root);
773 if (unlikely(err)) 773 if (unlikely(err))
774 goto failed; 774 goto failed;
775 775
776 set_nilfs_discontinued(nilfs); 776 set_nilfs_discontinued(nilfs);
777 err = nilfs_construct_segment(sbi->s_super); 777 err = nilfs_construct_segment(sb);
778 nilfs_detach_segment_constructor(sbi); 778 nilfs_detach_log_writer(sb);
779 779
780 if (unlikely(err)) { 780 if (unlikely(err)) {
781 printk(KERN_ERR "NILFS: Oops! recovery failed. " 781 printk(KERN_ERR "NILFS: Oops! recovery failed. "
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
deleted file mode 100644
index 7a17715f215f..000000000000
--- a/fs/nilfs2/sb.h
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * sb.h - NILFS on-memory super block structure.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#ifndef _NILFS_SB
25#define _NILFS_SB
26
27#include <linux/types.h>
28#include <linux/fs.h>
29
30struct the_nilfs;
31struct nilfs_sc_info;
32
33/*
34 * NILFS super-block data in memory
35 */
36struct nilfs_sb_info {
37 /* Mount options */
38 unsigned long s_mount_opt;
39 uid_t s_resuid;
40 gid_t s_resgid;
41
42 unsigned long s_interval; /* construction interval */
43 unsigned long s_watermark; /* threshold of data amount
44 for the segment construction */
45
46 /* Fundamental members */
47 struct super_block *s_super; /* reverse pointer to super_block */
48 struct the_nilfs *s_nilfs;
49
50 /* Segment constructor */
51 struct list_head s_dirty_files; /* dirty files list */
52 struct nilfs_sc_info *s_sc_info; /* segment constructor info */
53 spinlock_t s_inode_lock; /* Lock for the nilfs inode.
54 It covers s_dirty_files list */
55
56 /* Inode allocator */
57 spinlock_t s_next_gen_lock;
58 u32 s_next_generation;
59};
60
61static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb)
62{
63 return sb->s_fs_info;
64}
65
66static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi)
67{
68 return sbi->s_sc_info;
69}
70
71/*
72 * Bit operations for the mount option
73 */
74#define nilfs_clear_opt(sbi, opt) \
75 do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
76#define nilfs_set_opt(sbi, opt) \
77 do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0)
78#define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt)
79#define nilfs_write_opt(sbi, mask, opt) \
80 do { (sbi)->s_mount_opt = \
81 (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \
82 NILFS_MOUNT_##opt); \
83 } while (0)
84
85#endif /* _NILFS_SB */
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 0f83e93935b2..2853ff20f85a 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -509,7 +509,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
509 * Last BIO is always sent through the following 509 * Last BIO is always sent through the following
510 * submission. 510 * submission.
511 */ 511 */
512 rw |= REQ_SYNC | REQ_UNPLUG; 512 rw |= REQ_SYNC;
513 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); 513 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
514 } 514 }
515 515
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 2de9f636792a..afe4f2183454 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -104,8 +104,7 @@ struct nilfs_sc_operations {
104static void nilfs_segctor_start_timer(struct nilfs_sc_info *); 104static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); 105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); 106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
107static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, 107static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
108 int);
109 108
110#define nilfs_cnt32_gt(a, b) \ 109#define nilfs_cnt32_gt(a, b) \
111 (typecheck(__u32, a) && typecheck(__u32, b) && \ 110 (typecheck(__u32, a) && typecheck(__u32, b) && \
@@ -182,7 +181,6 @@ int nilfs_transaction_begin(struct super_block *sb,
182 struct nilfs_transaction_info *ti, 181 struct nilfs_transaction_info *ti,
183 int vacancy_check) 182 int vacancy_check)
184{ 183{
185 struct nilfs_sb_info *sbi;
186 struct the_nilfs *nilfs; 184 struct the_nilfs *nilfs;
187 int ret = nilfs_prepare_segment_lock(ti); 185 int ret = nilfs_prepare_segment_lock(ti);
188 186
@@ -193,8 +191,7 @@ int nilfs_transaction_begin(struct super_block *sb,
193 191
194 vfs_check_frozen(sb, SB_FREEZE_WRITE); 192 vfs_check_frozen(sb, SB_FREEZE_WRITE);
195 193
196 sbi = NILFS_SB(sb); 194 nilfs = sb->s_fs_info;
197 nilfs = sbi->s_nilfs;
198 down_read(&nilfs->ns_segctor_sem); 195 down_read(&nilfs->ns_segctor_sem);
199 if (vacancy_check && nilfs_near_disk_full(nilfs)) { 196 if (vacancy_check && nilfs_near_disk_full(nilfs)) {
200 up_read(&nilfs->ns_segctor_sem); 197 up_read(&nilfs->ns_segctor_sem);
@@ -225,8 +222,7 @@ int nilfs_transaction_begin(struct super_block *sb,
225int nilfs_transaction_commit(struct super_block *sb) 222int nilfs_transaction_commit(struct super_block *sb)
226{ 223{
227 struct nilfs_transaction_info *ti = current->journal_info; 224 struct nilfs_transaction_info *ti = current->journal_info;
228 struct nilfs_sb_info *sbi; 225 struct the_nilfs *nilfs = sb->s_fs_info;
229 struct nilfs_sc_info *sci;
230 int err = 0; 226 int err = 0;
231 227
232 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 228 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
@@ -235,16 +231,15 @@ int nilfs_transaction_commit(struct super_block *sb)
235 ti->ti_count--; 231 ti->ti_count--;
236 return 0; 232 return 0;
237 } 233 }
238 sbi = NILFS_SB(sb); 234 if (nilfs->ns_writer) {
239 sci = NILFS_SC(sbi); 235 struct nilfs_sc_info *sci = nilfs->ns_writer;
240 if (sci != NULL) { 236
241 if (ti->ti_flags & NILFS_TI_COMMIT) 237 if (ti->ti_flags & NILFS_TI_COMMIT)
242 nilfs_segctor_start_timer(sci); 238 nilfs_segctor_start_timer(sci);
243 if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > 239 if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
244 sci->sc_watermark)
245 nilfs_segctor_do_flush(sci, 0); 240 nilfs_segctor_do_flush(sci, 0);
246 } 241 }
247 up_read(&sbi->s_nilfs->ns_segctor_sem); 242 up_read(&nilfs->ns_segctor_sem);
248 current->journal_info = ti->ti_save; 243 current->journal_info = ti->ti_save;
249 244
250 if (ti->ti_flags & NILFS_TI_SYNC) 245 if (ti->ti_flags & NILFS_TI_SYNC)
@@ -257,13 +252,14 @@ int nilfs_transaction_commit(struct super_block *sb)
257void nilfs_transaction_abort(struct super_block *sb) 252void nilfs_transaction_abort(struct super_block *sb)
258{ 253{
259 struct nilfs_transaction_info *ti = current->journal_info; 254 struct nilfs_transaction_info *ti = current->journal_info;
255 struct the_nilfs *nilfs = sb->s_fs_info;
260 256
261 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 257 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
262 if (ti->ti_count > 0) { 258 if (ti->ti_count > 0) {
263 ti->ti_count--; 259 ti->ti_count--;
264 return; 260 return;
265 } 261 }
266 up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); 262 up_read(&nilfs->ns_segctor_sem);
267 263
268 current->journal_info = ti->ti_save; 264 current->journal_info = ti->ti_save;
269 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 265 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
@@ -272,9 +268,8 @@ void nilfs_transaction_abort(struct super_block *sb)
272 268
273void nilfs_relax_pressure_in_lock(struct super_block *sb) 269void nilfs_relax_pressure_in_lock(struct super_block *sb)
274{ 270{
275 struct nilfs_sb_info *sbi = NILFS_SB(sb); 271 struct the_nilfs *nilfs = sb->s_fs_info;
276 struct nilfs_sc_info *sci = NILFS_SC(sbi); 272 struct nilfs_sc_info *sci = nilfs->ns_writer;
277 struct the_nilfs *nilfs = sbi->s_nilfs;
278 273
279 if (!sci || !sci->sc_flush_request) 274 if (!sci || !sci->sc_flush_request)
280 return; 275 return;
@@ -294,11 +289,13 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb)
294 downgrade_write(&nilfs->ns_segctor_sem); 289 downgrade_write(&nilfs->ns_segctor_sem);
295} 290}
296 291
297static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, 292static void nilfs_transaction_lock(struct super_block *sb,
298 struct nilfs_transaction_info *ti, 293 struct nilfs_transaction_info *ti,
299 int gcflag) 294 int gcflag)
300{ 295{
301 struct nilfs_transaction_info *cur_ti = current->journal_info; 296 struct nilfs_transaction_info *cur_ti = current->journal_info;
297 struct the_nilfs *nilfs = sb->s_fs_info;
298 struct nilfs_sc_info *sci = nilfs->ns_writer;
302 299
303 WARN_ON(cur_ti); 300 WARN_ON(cur_ti);
304 ti->ti_flags = NILFS_TI_WRITER; 301 ti->ti_flags = NILFS_TI_WRITER;
@@ -309,30 +306,31 @@ static void nilfs_transaction_lock(struct nilfs_sb_info *sbi,
309 current->journal_info = ti; 306 current->journal_info = ti;
310 307
311 for (;;) { 308 for (;;) {
312 down_write(&sbi->s_nilfs->ns_segctor_sem); 309 down_write(&nilfs->ns_segctor_sem);
313 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) 310 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
314 break; 311 break;
315 312
316 nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); 313 nilfs_segctor_do_immediate_flush(sci);
317 314
318 up_write(&sbi->s_nilfs->ns_segctor_sem); 315 up_write(&nilfs->ns_segctor_sem);
319 yield(); 316 yield();
320 } 317 }
321 if (gcflag) 318 if (gcflag)
322 ti->ti_flags |= NILFS_TI_GC; 319 ti->ti_flags |= NILFS_TI_GC;
323} 320}
324 321
325static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) 322static void nilfs_transaction_unlock(struct super_block *sb)
326{ 323{
327 struct nilfs_transaction_info *ti = current->journal_info; 324 struct nilfs_transaction_info *ti = current->journal_info;
325 struct the_nilfs *nilfs = sb->s_fs_info;
328 326
329 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 327 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
330 BUG_ON(ti->ti_count > 0); 328 BUG_ON(ti->ti_count > 0);
331 329
332 up_write(&sbi->s_nilfs->ns_segctor_sem); 330 up_write(&nilfs->ns_segctor_sem);
333 current->journal_info = ti->ti_save; 331 current->journal_info = ti->ti_save;
334 if (!list_empty(&ti->ti_garbage)) 332 if (!list_empty(&ti->ti_garbage))
335 nilfs_dispose_list(sbi, &ti->ti_garbage, 0); 333 nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
336} 334}
337 335
338static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 336static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -714,7 +712,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
714 } 712 }
715} 713}
716 714
717static void nilfs_dispose_list(struct nilfs_sb_info *sbi, 715static void nilfs_dispose_list(struct the_nilfs *nilfs,
718 struct list_head *head, int force) 716 struct list_head *head, int force)
719{ 717{
720 struct nilfs_inode_info *ii, *n; 718 struct nilfs_inode_info *ii, *n;
@@ -722,7 +720,7 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
722 unsigned nv = 0; 720 unsigned nv = 0;
723 721
724 while (!list_empty(head)) { 722 while (!list_empty(head)) {
725 spin_lock(&sbi->s_inode_lock); 723 spin_lock(&nilfs->ns_inode_lock);
726 list_for_each_entry_safe(ii, n, head, i_dirty) { 724 list_for_each_entry_safe(ii, n, head, i_dirty) {
727 list_del_init(&ii->i_dirty); 725 list_del_init(&ii->i_dirty);
728 if (force) { 726 if (force) {
@@ -733,14 +731,14 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
733 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { 731 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
734 set_bit(NILFS_I_QUEUED, &ii->i_state); 732 set_bit(NILFS_I_QUEUED, &ii->i_state);
735 list_add_tail(&ii->i_dirty, 733 list_add_tail(&ii->i_dirty,
736 &sbi->s_dirty_files); 734 &nilfs->ns_dirty_files);
737 continue; 735 continue;
738 } 736 }
739 ivec[nv++] = ii; 737 ivec[nv++] = ii;
740 if (nv == SC_N_INODEVEC) 738 if (nv == SC_N_INODEVEC)
741 break; 739 break;
742 } 740 }
743 spin_unlock(&sbi->s_inode_lock); 741 spin_unlock(&nilfs->ns_inode_lock);
744 742
745 for (pii = ivec; nv > 0; pii++, nv--) 743 for (pii = ivec; nv > 0; pii++, nv--)
746 iput(&(*pii)->vfs_inode); 744 iput(&(*pii)->vfs_inode);
@@ -773,24 +771,23 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
773 771
774static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) 772static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
775{ 773{
776 struct nilfs_sb_info *sbi = sci->sc_sbi; 774 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
777 int ret = 0; 775 int ret = 0;
778 776
779 if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) 777 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
780 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 778 set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
781 779
782 spin_lock(&sbi->s_inode_lock); 780 spin_lock(&nilfs->ns_inode_lock);
783 if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) 781 if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
784 ret++; 782 ret++;
785 783
786 spin_unlock(&sbi->s_inode_lock); 784 spin_unlock(&nilfs->ns_inode_lock);
787 return ret; 785 return ret;
788} 786}
789 787
790static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) 788static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
791{ 789{
792 struct nilfs_sb_info *sbi = sci->sc_sbi; 790 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
793 struct the_nilfs *nilfs = sbi->s_nilfs;
794 791
795 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 792 nilfs_mdt_clear_dirty(sci->sc_root->ifile);
796 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 793 nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
@@ -800,7 +797,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
800 797
801static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 798static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
802{ 799{
803 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 800 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
804 struct buffer_head *bh_cp; 801 struct buffer_head *bh_cp;
805 struct nilfs_checkpoint *raw_cp; 802 struct nilfs_checkpoint *raw_cp;
806 int err; 803 int err;
@@ -824,8 +821,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
824 821
825static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) 822static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
826{ 823{
827 struct nilfs_sb_info *sbi = sci->sc_sbi; 824 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
828 struct the_nilfs *nilfs = sbi->s_nilfs;
829 struct buffer_head *bh_cp; 825 struct buffer_head *bh_cp;
830 struct nilfs_checkpoint *raw_cp; 826 struct nilfs_checkpoint *raw_cp;
831 int err; 827 int err;
@@ -1049,8 +1045,7 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1049 1045
1050static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) 1046static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1051{ 1047{
1052 struct nilfs_sb_info *sbi = sci->sc_sbi; 1048 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1053 struct the_nilfs *nilfs = sbi->s_nilfs;
1054 struct list_head *head; 1049 struct list_head *head;
1055 struct nilfs_inode_info *ii; 1050 struct nilfs_inode_info *ii;
1056 size_t ndone; 1051 size_t ndone;
@@ -1859,7 +1854,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1859{ 1854{
1860 struct nilfs_segment_buffer *segbuf; 1855 struct nilfs_segment_buffer *segbuf;
1861 struct page *bd_page = NULL, *fs_page = NULL; 1856 struct page *bd_page = NULL, *fs_page = NULL;
1862 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 1857 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1863 int update_sr = false; 1858 int update_sr = false;
1864 1859
1865 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1860 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
@@ -1963,30 +1958,30 @@ static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
1963 return ret; 1958 return ret;
1964} 1959}
1965 1960
1966static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, 1961static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1967 struct nilfs_sb_info *sbi) 1962 struct the_nilfs *nilfs)
1968{ 1963{
1969 struct nilfs_inode_info *ii, *n; 1964 struct nilfs_inode_info *ii, *n;
1970 struct inode *ifile = sci->sc_root->ifile; 1965 struct inode *ifile = sci->sc_root->ifile;
1971 1966
1972 spin_lock(&sbi->s_inode_lock); 1967 spin_lock(&nilfs->ns_inode_lock);
1973 retry: 1968 retry:
1974 list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { 1969 list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
1975 if (!ii->i_bh) { 1970 if (!ii->i_bh) {
1976 struct buffer_head *ibh; 1971 struct buffer_head *ibh;
1977 int err; 1972 int err;
1978 1973
1979 spin_unlock(&sbi->s_inode_lock); 1974 spin_unlock(&nilfs->ns_inode_lock);
1980 err = nilfs_ifile_get_inode_block( 1975 err = nilfs_ifile_get_inode_block(
1981 ifile, ii->vfs_inode.i_ino, &ibh); 1976 ifile, ii->vfs_inode.i_ino, &ibh);
1982 if (unlikely(err)) { 1977 if (unlikely(err)) {
1983 nilfs_warning(sbi->s_super, __func__, 1978 nilfs_warning(sci->sc_super, __func__,
1984 "failed to get inode block.\n"); 1979 "failed to get inode block.\n");
1985 return err; 1980 return err;
1986 } 1981 }
1987 nilfs_mdt_mark_buffer_dirty(ibh); 1982 nilfs_mdt_mark_buffer_dirty(ibh);
1988 nilfs_mdt_mark_dirty(ifile); 1983 nilfs_mdt_mark_dirty(ifile);
1989 spin_lock(&sbi->s_inode_lock); 1984 spin_lock(&nilfs->ns_inode_lock);
1990 if (likely(!ii->i_bh)) 1985 if (likely(!ii->i_bh))
1991 ii->i_bh = ibh; 1986 ii->i_bh = ibh;
1992 else 1987 else
@@ -1999,18 +1994,18 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
1999 list_del(&ii->i_dirty); 1994 list_del(&ii->i_dirty);
2000 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); 1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
2001 } 1996 }
2002 spin_unlock(&sbi->s_inode_lock); 1997 spin_unlock(&nilfs->ns_inode_lock);
2003 1998
2004 return 0; 1999 return 0;
2005} 2000}
2006 2001
2007static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, 2002static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2008 struct nilfs_sb_info *sbi) 2003 struct the_nilfs *nilfs)
2009{ 2004{
2010 struct nilfs_transaction_info *ti = current->journal_info; 2005 struct nilfs_transaction_info *ti = current->journal_info;
2011 struct nilfs_inode_info *ii, *n; 2006 struct nilfs_inode_info *ii, *n;
2012 2007
2013 spin_lock(&sbi->s_inode_lock); 2008 spin_lock(&nilfs->ns_inode_lock);
2014 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 2009 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
2015 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || 2010 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
2016 test_bit(NILFS_I_DIRTY, &ii->i_state)) 2011 test_bit(NILFS_I_DIRTY, &ii->i_state))
@@ -2022,7 +2017,7 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2022 list_del(&ii->i_dirty); 2017 list_del(&ii->i_dirty);
2023 list_add_tail(&ii->i_dirty, &ti->ti_garbage); 2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2024 } 2019 }
2025 spin_unlock(&sbi->s_inode_lock); 2020 spin_unlock(&nilfs->ns_inode_lock);
2026} 2021}
2027 2022
2028/* 2023/*
@@ -2030,15 +2025,14 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2030 */ 2025 */
2031static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2032{ 2027{
2033 struct nilfs_sb_info *sbi = sci->sc_sbi; 2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2034 struct the_nilfs *nilfs = sbi->s_nilfs;
2035 struct page *failed_page; 2029 struct page *failed_page;
2036 int err; 2030 int err;
2037 2031
2038 sci->sc_stage.scnt = NILFS_ST_INIT; 2032 sci->sc_stage.scnt = NILFS_ST_INIT;
2039 sci->sc_cno = nilfs->ns_cno; 2033 sci->sc_cno = nilfs->ns_cno;
2040 2034
2041 err = nilfs_segctor_check_in_files(sci, sbi); 2035 err = nilfs_segctor_collect_dirty_files(sci, nilfs);
2042 if (unlikely(err)) 2036 if (unlikely(err))
2043 goto out; 2037 goto out;
2044 2038
@@ -2116,7 +2110,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2116 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2110 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2117 2111
2118 out: 2112 out:
2119 nilfs_segctor_check_out_files(sci, sbi); 2113 nilfs_segctor_drop_written_files(sci, nilfs);
2120 return err; 2114 return err;
2121 2115
2122 failed_to_write: 2116 failed_to_write:
@@ -2169,8 +2163,8 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2169 */ 2163 */
2170void nilfs_flush_segment(struct super_block *sb, ino_t ino) 2164void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2171{ 2165{
2172 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2166 struct the_nilfs *nilfs = sb->s_fs_info;
2173 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2167 struct nilfs_sc_info *sci = nilfs->ns_writer;
2174 2168
2175 if (!sci || nilfs_doing_construction()) 2169 if (!sci || nilfs_doing_construction())
2176 return; 2170 return;
@@ -2259,8 +2253,8 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2259 */ 2253 */
2260int nilfs_construct_segment(struct super_block *sb) 2254int nilfs_construct_segment(struct super_block *sb)
2261{ 2255{
2262 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2256 struct the_nilfs *nilfs = sb->s_fs_info;
2263 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2257 struct nilfs_sc_info *sci = nilfs->ns_writer;
2264 struct nilfs_transaction_info *ti; 2258 struct nilfs_transaction_info *ti;
2265 int err; 2259 int err;
2266 2260
@@ -2297,8 +2291,8 @@ int nilfs_construct_segment(struct super_block *sb)
2297int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, 2291int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2298 loff_t start, loff_t end) 2292 loff_t start, loff_t end)
2299{ 2293{
2300 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2294 struct the_nilfs *nilfs = sb->s_fs_info;
2301 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2295 struct nilfs_sc_info *sci = nilfs->ns_writer;
2302 struct nilfs_inode_info *ii; 2296 struct nilfs_inode_info *ii;
2303 struct nilfs_transaction_info ti; 2297 struct nilfs_transaction_info ti;
2304 int err = 0; 2298 int err = 0;
@@ -2306,33 +2300,33 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2306 if (!sci) 2300 if (!sci)
2307 return -EROFS; 2301 return -EROFS;
2308 2302
2309 nilfs_transaction_lock(sbi, &ti, 0); 2303 nilfs_transaction_lock(sb, &ti, 0);
2310 2304
2311 ii = NILFS_I(inode); 2305 ii = NILFS_I(inode);
2312 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || 2306 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) ||
2313 nilfs_test_opt(sbi, STRICT_ORDER) || 2307 nilfs_test_opt(nilfs, STRICT_ORDER) ||
2314 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2308 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2315 nilfs_discontinued(sbi->s_nilfs)) { 2309 nilfs_discontinued(nilfs)) {
2316 nilfs_transaction_unlock(sbi); 2310 nilfs_transaction_unlock(sb);
2317 err = nilfs_segctor_sync(sci); 2311 err = nilfs_segctor_sync(sci);
2318 return err; 2312 return err;
2319 } 2313 }
2320 2314
2321 spin_lock(&sbi->s_inode_lock); 2315 spin_lock(&nilfs->ns_inode_lock);
2322 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 2316 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2323 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 2317 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2324 spin_unlock(&sbi->s_inode_lock); 2318 spin_unlock(&nilfs->ns_inode_lock);
2325 nilfs_transaction_unlock(sbi); 2319 nilfs_transaction_unlock(sb);
2326 return 0; 2320 return 0;
2327 } 2321 }
2328 spin_unlock(&sbi->s_inode_lock); 2322 spin_unlock(&nilfs->ns_inode_lock);
2329 sci->sc_dsync_inode = ii; 2323 sci->sc_dsync_inode = ii;
2330 sci->sc_dsync_start = start; 2324 sci->sc_dsync_start = start;
2331 sci->sc_dsync_end = end; 2325 sci->sc_dsync_end = end;
2332 2326
2333 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); 2327 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2334 2328
2335 nilfs_transaction_unlock(sbi); 2329 nilfs_transaction_unlock(sb);
2336 return err; 2330 return err;
2337} 2331}
2338 2332
@@ -2388,8 +2382,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2388 */ 2382 */
2389static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) 2383static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2390{ 2384{
2391 struct nilfs_sb_info *sbi = sci->sc_sbi; 2385 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2392 struct the_nilfs *nilfs = sbi->s_nilfs;
2393 struct nilfs_super_block **sbp; 2386 struct nilfs_super_block **sbp;
2394 int err = 0; 2387 int err = 0;
2395 2388
@@ -2407,11 +2400,12 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2407 nilfs_discontinued(nilfs)) { 2400 nilfs_discontinued(nilfs)) {
2408 down_write(&nilfs->ns_sem); 2401 down_write(&nilfs->ns_sem);
2409 err = -EIO; 2402 err = -EIO;
2410 sbp = nilfs_prepare_super(sbi, 2403 sbp = nilfs_prepare_super(sci->sc_super,
2411 nilfs_sb_will_flip(nilfs)); 2404 nilfs_sb_will_flip(nilfs));
2412 if (likely(sbp)) { 2405 if (likely(sbp)) {
2413 nilfs_set_log_cursor(sbp[0], nilfs); 2406 nilfs_set_log_cursor(sbp[0], nilfs);
2414 err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); 2407 err = nilfs_commit_super(sci->sc_super,
2408 NILFS_SB_COMMIT);
2415 } 2409 }
2416 up_write(&nilfs->ns_sem); 2410 up_write(&nilfs->ns_sem);
2417 } 2411 }
@@ -2443,16 +2437,15 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2443int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, 2437int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2444 void **kbufs) 2438 void **kbufs)
2445{ 2439{
2446 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2440 struct the_nilfs *nilfs = sb->s_fs_info;
2447 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2441 struct nilfs_sc_info *sci = nilfs->ns_writer;
2448 struct the_nilfs *nilfs = sbi->s_nilfs;
2449 struct nilfs_transaction_info ti; 2442 struct nilfs_transaction_info ti;
2450 int err; 2443 int err;
2451 2444
2452 if (unlikely(!sci)) 2445 if (unlikely(!sci))
2453 return -EROFS; 2446 return -EROFS;
2454 2447
2455 nilfs_transaction_lock(sbi, &ti, 1); 2448 nilfs_transaction_lock(sb, &ti, 1);
2456 2449
2457 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); 2450 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
2458 if (unlikely(err)) 2451 if (unlikely(err))
@@ -2480,14 +2473,14 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2480 set_current_state(TASK_INTERRUPTIBLE); 2473 set_current_state(TASK_INTERRUPTIBLE);
2481 schedule_timeout(sci->sc_interval); 2474 schedule_timeout(sci->sc_interval);
2482 } 2475 }
2483 if (nilfs_test_opt(sbi, DISCARD)) { 2476 if (nilfs_test_opt(nilfs, DISCARD)) {
2484 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, 2477 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
2485 sci->sc_nfreesegs); 2478 sci->sc_nfreesegs);
2486 if (ret) { 2479 if (ret) {
2487 printk(KERN_WARNING 2480 printk(KERN_WARNING
2488 "NILFS warning: error %d on discard request, " 2481 "NILFS warning: error %d on discard request, "
2489 "turning discards off for the device\n", ret); 2482 "turning discards off for the device\n", ret);
2490 nilfs_clear_opt(sbi, DISCARD); 2483 nilfs_clear_opt(nilfs, DISCARD);
2491 } 2484 }
2492 } 2485 }
2493 2486
@@ -2495,16 +2488,15 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2495 sci->sc_freesegs = NULL; 2488 sci->sc_freesegs = NULL;
2496 sci->sc_nfreesegs = 0; 2489 sci->sc_nfreesegs = 0;
2497 nilfs_mdt_clear_shadow_map(nilfs->ns_dat); 2490 nilfs_mdt_clear_shadow_map(nilfs->ns_dat);
2498 nilfs_transaction_unlock(sbi); 2491 nilfs_transaction_unlock(sb);
2499 return err; 2492 return err;
2500} 2493}
2501 2494
2502static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) 2495static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2503{ 2496{
2504 struct nilfs_sb_info *sbi = sci->sc_sbi;
2505 struct nilfs_transaction_info ti; 2497 struct nilfs_transaction_info ti;
2506 2498
2507 nilfs_transaction_lock(sbi, &ti, 0); 2499 nilfs_transaction_lock(sci->sc_super, &ti, 0);
2508 nilfs_segctor_construct(sci, mode); 2500 nilfs_segctor_construct(sci, mode);
2509 2501
2510 /* 2502 /*
@@ -2515,7 +2507,7 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2515 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) 2507 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2516 nilfs_segctor_start_timer(sci); 2508 nilfs_segctor_start_timer(sci);
2517 2509
2518 nilfs_transaction_unlock(sbi); 2510 nilfs_transaction_unlock(sci->sc_super);
2519} 2511}
2520 2512
2521static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2513static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
@@ -2561,7 +2553,7 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2561static int nilfs_segctor_thread(void *arg) 2553static int nilfs_segctor_thread(void *arg)
2562{ 2554{
2563 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2555 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2564 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 2556 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2565 int timeout = 0; 2557 int timeout = 0;
2566 2558
2567 sci->sc_timer.data = (unsigned long)current; 2559 sci->sc_timer.data = (unsigned long)current;
@@ -2672,17 +2664,17 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2672/* 2664/*
2673 * Setup & clean-up functions 2665 * Setup & clean-up functions
2674 */ 2666 */
2675static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, 2667static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2676 struct nilfs_root *root) 2668 struct nilfs_root *root)
2677{ 2669{
2670 struct the_nilfs *nilfs = sb->s_fs_info;
2678 struct nilfs_sc_info *sci; 2671 struct nilfs_sc_info *sci;
2679 2672
2680 sci = kzalloc(sizeof(*sci), GFP_KERNEL); 2673 sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2681 if (!sci) 2674 if (!sci)
2682 return NULL; 2675 return NULL;
2683 2676
2684 sci->sc_sbi = sbi; 2677 sci->sc_super = sb;
2685 sci->sc_super = sbi->s_super;
2686 2678
2687 nilfs_get_root(root); 2679 nilfs_get_root(root);
2688 sci->sc_root = root; 2680 sci->sc_root = root;
@@ -2702,10 +2694,10 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi,
2702 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2694 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
2703 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2695 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2704 2696
2705 if (sbi->s_interval) 2697 if (nilfs->ns_interval)
2706 sci->sc_interval = sbi->s_interval; 2698 sci->sc_interval = nilfs->ns_interval;
2707 if (sbi->s_watermark) 2699 if (nilfs->ns_watermark)
2708 sci->sc_watermark = sbi->s_watermark; 2700 sci->sc_watermark = nilfs->ns_watermark;
2709 return sci; 2701 return sci;
2710} 2702}
2711 2703
@@ -2716,12 +2708,11 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2716 /* The segctord thread was stopped and its timer was removed. 2708 /* The segctord thread was stopped and its timer was removed.
2717 But some tasks remain. */ 2709 But some tasks remain. */
2718 do { 2710 do {
2719 struct nilfs_sb_info *sbi = sci->sc_sbi;
2720 struct nilfs_transaction_info ti; 2711 struct nilfs_transaction_info ti;
2721 2712
2722 nilfs_transaction_lock(sbi, &ti, 0); 2713 nilfs_transaction_lock(sci->sc_super, &ti, 0);
2723 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2714 ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2724 nilfs_transaction_unlock(sbi); 2715 nilfs_transaction_unlock(sci->sc_super);
2725 2716
2726 } while (ret && retrycount-- > 0); 2717 } while (ret && retrycount-- > 0);
2727} 2718}
@@ -2736,10 +2727,10 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2736 */ 2727 */
2737static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) 2728static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2738{ 2729{
2739 struct nilfs_sb_info *sbi = sci->sc_sbi; 2730 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2740 int flag; 2731 int flag;
2741 2732
2742 up_write(&sbi->s_nilfs->ns_segctor_sem); 2733 up_write(&nilfs->ns_segctor_sem);
2743 2734
2744 spin_lock(&sci->sc_state_lock); 2735 spin_lock(&sci->sc_state_lock);
2745 nilfs_segctor_kill_thread(sci); 2736 nilfs_segctor_kill_thread(sci);
@@ -2753,9 +2744,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2753 WARN_ON(!list_empty(&sci->sc_copied_buffers)); 2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2754 2745
2755 if (!list_empty(&sci->sc_dirty_files)) { 2746 if (!list_empty(&sci->sc_dirty_files)) {
2756 nilfs_warning(sbi->s_super, __func__, 2747 nilfs_warning(sci->sc_super, __func__,
2757 "dirty file(s) after the final construction\n"); 2748 "dirty file(s) after the final construction\n");
2758 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); 2749 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2759 } 2750 }
2760 2751
2761 WARN_ON(!list_empty(&sci->sc_segbufs)); 2752 WARN_ON(!list_empty(&sci->sc_segbufs));
@@ -2763,79 +2754,78 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2763 2754
2764 nilfs_put_root(sci->sc_root); 2755 nilfs_put_root(sci->sc_root);
2765 2756
2766 down_write(&sbi->s_nilfs->ns_segctor_sem); 2757 down_write(&nilfs->ns_segctor_sem);
2767 2758
2768 del_timer_sync(&sci->sc_timer); 2759 del_timer_sync(&sci->sc_timer);
2769 kfree(sci); 2760 kfree(sci);
2770} 2761}
2771 2762
2772/** 2763/**
2773 * nilfs_attach_segment_constructor - attach a segment constructor 2764 * nilfs_attach_log_writer - attach log writer
2774 * @sbi: nilfs_sb_info 2765 * @sb: super block instance
2775 * @root: root object of the current filesystem tree 2766 * @root: root object of the current filesystem tree
2776 * 2767 *
2777 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, 2768 * This allocates a log writer object, initializes it, and starts the
2778 * initializes it, and starts the segment constructor. 2769 * log writer.
2779 * 2770 *
2780 * Return Value: On success, 0 is returned. On error, one of the following 2771 * Return Value: On success, 0 is returned. On error, one of the following
2781 * negative error code is returned. 2772 * negative error code is returned.
2782 * 2773 *
2783 * %-ENOMEM - Insufficient memory available. 2774 * %-ENOMEM - Insufficient memory available.
2784 */ 2775 */
2785int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, 2776int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
2786 struct nilfs_root *root)
2787{ 2777{
2778 struct the_nilfs *nilfs = sb->s_fs_info;
2788 int err; 2779 int err;
2789 2780
2790 if (NILFS_SC(sbi)) { 2781 if (nilfs->ns_writer) {
2791 /* 2782 /*
2792 * This happens if the filesystem was remounted 2783 * This happens if the filesystem was remounted
2793 * read/write after nilfs_error degenerated it into a 2784 * read/write after nilfs_error degenerated it into a
2794 * read-only mount. 2785 * read-only mount.
2795 */ 2786 */
2796 nilfs_detach_segment_constructor(sbi); 2787 nilfs_detach_log_writer(sb);
2797 } 2788 }
2798 2789
2799 sbi->s_sc_info = nilfs_segctor_new(sbi, root); 2790 nilfs->ns_writer = nilfs_segctor_new(sb, root);
2800 if (!sbi->s_sc_info) 2791 if (!nilfs->ns_writer)
2801 return -ENOMEM; 2792 return -ENOMEM;
2802 2793
2803 err = nilfs_segctor_start_thread(NILFS_SC(sbi)); 2794 err = nilfs_segctor_start_thread(nilfs->ns_writer);
2804 if (err) { 2795 if (err) {
2805 kfree(sbi->s_sc_info); 2796 kfree(nilfs->ns_writer);
2806 sbi->s_sc_info = NULL; 2797 nilfs->ns_writer = NULL;
2807 } 2798 }
2808 return err; 2799 return err;
2809} 2800}
2810 2801
2811/** 2802/**
2812 * nilfs_detach_segment_constructor - destroy the segment constructor 2803 * nilfs_detach_log_writer - destroy log writer
2813 * @sbi: nilfs_sb_info 2804 * @sb: super block instance
2814 * 2805 *
2815 * nilfs_detach_segment_constructor() kills the segment constructor daemon, 2806 * This kills log writer daemon, frees the log writer object, and
2816 * frees the struct nilfs_sc_info, and destroy the dirty file list. 2807 * destroys list of dirty files.
2817 */ 2808 */
2818void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) 2809void nilfs_detach_log_writer(struct super_block *sb)
2819{ 2810{
2820 struct the_nilfs *nilfs = sbi->s_nilfs; 2811 struct the_nilfs *nilfs = sb->s_fs_info;
2821 LIST_HEAD(garbage_list); 2812 LIST_HEAD(garbage_list);
2822 2813
2823 down_write(&nilfs->ns_segctor_sem); 2814 down_write(&nilfs->ns_segctor_sem);
2824 if (NILFS_SC(sbi)) { 2815 if (nilfs->ns_writer) {
2825 nilfs_segctor_destroy(NILFS_SC(sbi)); 2816 nilfs_segctor_destroy(nilfs->ns_writer);
2826 sbi->s_sc_info = NULL; 2817 nilfs->ns_writer = NULL;
2827 } 2818 }
2828 2819
2829 /* Force to free the list of dirty files */ 2820 /* Force to free the list of dirty files */
2830 spin_lock(&sbi->s_inode_lock); 2821 spin_lock(&nilfs->ns_inode_lock);
2831 if (!list_empty(&sbi->s_dirty_files)) { 2822 if (!list_empty(&nilfs->ns_dirty_files)) {
2832 list_splice_init(&sbi->s_dirty_files, &garbage_list); 2823 list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
2833 nilfs_warning(sbi->s_super, __func__, 2824 nilfs_warning(sb, __func__,
2834 "Non empty dirty list after the last " 2825 "Hit dirty file after stopped log writer\n");
2835 "segment construction\n"); 2826 }
2836 } 2827 spin_unlock(&nilfs->ns_inode_lock);
2837 spin_unlock(&sbi->s_inode_lock);
2838 up_write(&nilfs->ns_segctor_sem); 2828 up_write(&nilfs->ns_segctor_sem);
2839 2829
2840 nilfs_dispose_list(sbi, &garbage_list, 1); 2830 nilfs_dispose_list(nilfs, &garbage_list, 1);
2841} 2831}
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index cd8056e7cbed..6c02a86745fb 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -27,7 +27,7 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h> 29#include <linux/nilfs2_fs.h>
30#include "sb.h" 30#include "nilfs.h"
31 31
32struct nilfs_root; 32struct nilfs_root;
33 33
@@ -88,7 +88,6 @@ struct nilfs_segsum_pointer {
88/** 88/**
89 * struct nilfs_sc_info - Segment constructor information 89 * struct nilfs_sc_info - Segment constructor information
90 * @sc_super: Back pointer to super_block struct 90 * @sc_super: Back pointer to super_block struct
91 * @sc_sbi: Back pointer to nilfs_sb_info struct
92 * @sc_root: root object of the current filesystem tree 91 * @sc_root: root object of the current filesystem tree
93 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
94 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
@@ -131,7 +130,6 @@ struct nilfs_segsum_pointer {
131 */ 130 */
132struct nilfs_sc_info { 131struct nilfs_sc_info {
133 struct super_block *sc_super; 132 struct super_block *sc_super;
134 struct nilfs_sb_info *sc_sbi;
135 struct nilfs_root *sc_root; 133 struct nilfs_root *sc_root;
136 134
137 unsigned long sc_nblk_inc; 135 unsigned long sc_nblk_inc;
@@ -235,18 +233,16 @@ extern void nilfs_flush_segment(struct super_block *, ino_t);
235extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, 233extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
236 void **); 234 void **);
237 235
238int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, 236int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root);
239 struct nilfs_root *root); 237void nilfs_detach_log_writer(struct super_block *sb);
240extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
241 238
242/* recovery.c */ 239/* recovery.c */
243extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t, 240extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
244 struct buffer_head **, int); 241 struct buffer_head **, int);
245extern int nilfs_search_super_root(struct the_nilfs *, 242extern int nilfs_search_super_root(struct the_nilfs *,
246 struct nilfs_recovery_info *); 243 struct nilfs_recovery_info *);
247extern int nilfs_salvage_orphan_logs(struct the_nilfs *, 244int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb,
248 struct nilfs_sb_info *, 245 struct nilfs_recovery_info *ri);
249 struct nilfs_recovery_info *);
250extern void nilfs_dispose_segment_list(struct list_head *); 246extern void nilfs_dispose_segment_list(struct list_head *);
251 247
252#endif /* _NILFS_SEGMENT_H */ 248#endif /* _NILFS_SEGMENT_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1673b3d99842..062cca065195 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -43,7 +43,6 @@
43#include <linux/init.h> 43#include <linux/init.h>
44#include <linux/blkdev.h> 44#include <linux/blkdev.h>
45#include <linux/parser.h> 45#include <linux/parser.h>
46#include <linux/random.h>
47#include <linux/crc32.h> 46#include <linux/crc32.h>
48#include <linux/vfs.h> 47#include <linux/vfs.h>
49#include <linux/writeback.h> 48#include <linux/writeback.h>
@@ -72,23 +71,23 @@ struct kmem_cache *nilfs_transaction_cachep;
72struct kmem_cache *nilfs_segbuf_cachep; 71struct kmem_cache *nilfs_segbuf_cachep;
73struct kmem_cache *nilfs_btree_path_cache; 72struct kmem_cache *nilfs_btree_path_cache;
74 73
75static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount); 74static int nilfs_setup_super(struct super_block *sb, int is_mount);
76static int nilfs_remount(struct super_block *sb, int *flags, char *data); 75static int nilfs_remount(struct super_block *sb, int *flags, char *data);
77 76
78static void nilfs_set_error(struct nilfs_sb_info *sbi) 77static void nilfs_set_error(struct super_block *sb)
79{ 78{
80 struct the_nilfs *nilfs = sbi->s_nilfs; 79 struct the_nilfs *nilfs = sb->s_fs_info;
81 struct nilfs_super_block **sbp; 80 struct nilfs_super_block **sbp;
82 81
83 down_write(&nilfs->ns_sem); 82 down_write(&nilfs->ns_sem);
84 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { 83 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
85 nilfs->ns_mount_state |= NILFS_ERROR_FS; 84 nilfs->ns_mount_state |= NILFS_ERROR_FS;
86 sbp = nilfs_prepare_super(sbi, 0); 85 sbp = nilfs_prepare_super(sb, 0);
87 if (likely(sbp)) { 86 if (likely(sbp)) {
88 sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); 87 sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
89 if (sbp[1]) 88 if (sbp[1])
90 sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); 89 sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
91 nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 90 nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
92 } 91 }
93 } 92 }
94 up_write(&nilfs->ns_sem); 93 up_write(&nilfs->ns_sem);
@@ -109,7 +108,7 @@ static void nilfs_set_error(struct nilfs_sb_info *sbi)
109void nilfs_error(struct super_block *sb, const char *function, 108void nilfs_error(struct super_block *sb, const char *function,
110 const char *fmt, ...) 109 const char *fmt, ...)
111{ 110{
112 struct nilfs_sb_info *sbi = NILFS_SB(sb); 111 struct the_nilfs *nilfs = sb->s_fs_info;
113 struct va_format vaf; 112 struct va_format vaf;
114 va_list args; 113 va_list args;
115 114
@@ -124,15 +123,15 @@ void nilfs_error(struct super_block *sb, const char *function,
124 va_end(args); 123 va_end(args);
125 124
126 if (!(sb->s_flags & MS_RDONLY)) { 125 if (!(sb->s_flags & MS_RDONLY)) {
127 nilfs_set_error(sbi); 126 nilfs_set_error(sb);
128 127
129 if (nilfs_test_opt(sbi, ERRORS_RO)) { 128 if (nilfs_test_opt(nilfs, ERRORS_RO)) {
130 printk(KERN_CRIT "Remounting filesystem read-only\n"); 129 printk(KERN_CRIT "Remounting filesystem read-only\n");
131 sb->s_flags |= MS_RDONLY; 130 sb->s_flags |= MS_RDONLY;
132 } 131 }
133 } 132 }
134 133
135 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 134 if (nilfs_test_opt(nilfs, ERRORS_PANIC))
136 panic("NILFS (device %s): panic forced after error\n", 135 panic("NILFS (device %s): panic forced after error\n",
137 sb->s_id); 136 sb->s_id);
138} 137}
@@ -189,14 +188,14 @@ void nilfs_destroy_inode(struct inode *inode)
189 call_rcu(&inode->i_rcu, nilfs_i_callback); 188 call_rcu(&inode->i_rcu, nilfs_i_callback);
190} 189}
191 190
192static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) 191static int nilfs_sync_super(struct super_block *sb, int flag)
193{ 192{
194 struct the_nilfs *nilfs = sbi->s_nilfs; 193 struct the_nilfs *nilfs = sb->s_fs_info;
195 int err; 194 int err;
196 195
197 retry: 196 retry:
198 set_buffer_dirty(nilfs->ns_sbh[0]); 197 set_buffer_dirty(nilfs->ns_sbh[0]);
199 if (nilfs_test_opt(sbi, BARRIER)) { 198 if (nilfs_test_opt(nilfs, BARRIER)) {
200 err = __sync_dirty_buffer(nilfs->ns_sbh[0], 199 err = __sync_dirty_buffer(nilfs->ns_sbh[0],
201 WRITE_SYNC | WRITE_FLUSH_FUA); 200 WRITE_SYNC | WRITE_FLUSH_FUA);
202 } else { 201 } else {
@@ -263,10 +262,10 @@ void nilfs_set_log_cursor(struct nilfs_super_block *sbp,
263 spin_unlock(&nilfs->ns_last_segment_lock); 262 spin_unlock(&nilfs->ns_last_segment_lock);
264} 263}
265 264
266struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, 265struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
267 int flip) 266 int flip)
268{ 267{
269 struct the_nilfs *nilfs = sbi->s_nilfs; 268 struct the_nilfs *nilfs = sb->s_fs_info;
270 struct nilfs_super_block **sbp = nilfs->ns_sbp; 269 struct nilfs_super_block **sbp = nilfs->ns_sbp;
271 270
272 /* nilfs->ns_sem must be locked by the caller. */ 271 /* nilfs->ns_sem must be locked by the caller. */
@@ -276,7 +275,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
276 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); 275 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
277 } else { 276 } else {
278 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", 277 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
279 sbi->s_super->s_id); 278 sb->s_id);
280 return NULL; 279 return NULL;
281 } 280 }
282 } else if (sbp[1] && 281 } else if (sbp[1] &&
@@ -290,9 +289,9 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
290 return sbp; 289 return sbp;
291} 290}
292 291
293int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag) 292int nilfs_commit_super(struct super_block *sb, int flag)
294{ 293{
295 struct the_nilfs *nilfs = sbi->s_nilfs; 294 struct the_nilfs *nilfs = sb->s_fs_info;
296 struct nilfs_super_block **sbp = nilfs->ns_sbp; 295 struct nilfs_super_block **sbp = nilfs->ns_sbp;
297 time_t t; 296 time_t t;
298 297
@@ -312,27 +311,28 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag)
312 nilfs->ns_sbsize)); 311 nilfs->ns_sbsize));
313 } 312 }
314 clear_nilfs_sb_dirty(nilfs); 313 clear_nilfs_sb_dirty(nilfs);
315 return nilfs_sync_super(sbi, flag); 314 return nilfs_sync_super(sb, flag);
316} 315}
317 316
318/** 317/**
319 * nilfs_cleanup_super() - write filesystem state for cleanup 318 * nilfs_cleanup_super() - write filesystem state for cleanup
320 * @sbi: nilfs_sb_info to be unmounted or degraded to read-only 319 * @sb: super block instance to be unmounted or degraded to read-only
321 * 320 *
322 * This function restores state flags in the on-disk super block. 321 * This function restores state flags in the on-disk super block.
323 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the 322 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
324 * filesystem was not clean previously. 323 * filesystem was not clean previously.
325 */ 324 */
326int nilfs_cleanup_super(struct nilfs_sb_info *sbi) 325int nilfs_cleanup_super(struct super_block *sb)
327{ 326{
327 struct the_nilfs *nilfs = sb->s_fs_info;
328 struct nilfs_super_block **sbp; 328 struct nilfs_super_block **sbp;
329 int flag = NILFS_SB_COMMIT; 329 int flag = NILFS_SB_COMMIT;
330 int ret = -EIO; 330 int ret = -EIO;
331 331
332 sbp = nilfs_prepare_super(sbi, 0); 332 sbp = nilfs_prepare_super(sb, 0);
333 if (sbp) { 333 if (sbp) {
334 sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state); 334 sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
335 nilfs_set_log_cursor(sbp[0], sbi->s_nilfs); 335 nilfs_set_log_cursor(sbp[0], nilfs);
336 if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { 336 if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) {
337 /* 337 /*
338 * make the "clean" flag also to the opposite 338 * make the "clean" flag also to the opposite
@@ -342,21 +342,20 @@ int nilfs_cleanup_super(struct nilfs_sb_info *sbi)
342 sbp[1]->s_state = sbp[0]->s_state; 342 sbp[1]->s_state = sbp[0]->s_state;
343 flag = NILFS_SB_COMMIT_ALL; 343 flag = NILFS_SB_COMMIT_ALL;
344 } 344 }
345 ret = nilfs_commit_super(sbi, flag); 345 ret = nilfs_commit_super(sb, flag);
346 } 346 }
347 return ret; 347 return ret;
348} 348}
349 349
350static void nilfs_put_super(struct super_block *sb) 350static void nilfs_put_super(struct super_block *sb)
351{ 351{
352 struct nilfs_sb_info *sbi = NILFS_SB(sb); 352 struct the_nilfs *nilfs = sb->s_fs_info;
353 struct the_nilfs *nilfs = sbi->s_nilfs;
354 353
355 nilfs_detach_segment_constructor(sbi); 354 nilfs_detach_log_writer(sb);
356 355
357 if (!(sb->s_flags & MS_RDONLY)) { 356 if (!(sb->s_flags & MS_RDONLY)) {
358 down_write(&nilfs->ns_sem); 357 down_write(&nilfs->ns_sem);
359 nilfs_cleanup_super(sbi); 358 nilfs_cleanup_super(sb);
360 up_write(&nilfs->ns_sem); 359 up_write(&nilfs->ns_sem);
361 } 360 }
362 361
@@ -365,15 +364,12 @@ static void nilfs_put_super(struct super_block *sb)
365 iput(nilfs->ns_dat); 364 iput(nilfs->ns_dat);
366 365
367 destroy_nilfs(nilfs); 366 destroy_nilfs(nilfs);
368 sbi->s_super = NULL;
369 sb->s_fs_info = NULL; 367 sb->s_fs_info = NULL;
370 kfree(sbi);
371} 368}
372 369
373static int nilfs_sync_fs(struct super_block *sb, int wait) 370static int nilfs_sync_fs(struct super_block *sb, int wait)
374{ 371{
375 struct nilfs_sb_info *sbi = NILFS_SB(sb); 372 struct the_nilfs *nilfs = sb->s_fs_info;
376 struct the_nilfs *nilfs = sbi->s_nilfs;
377 struct nilfs_super_block **sbp; 373 struct nilfs_super_block **sbp;
378 int err = 0; 374 int err = 0;
379 375
@@ -383,10 +379,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
383 379
384 down_write(&nilfs->ns_sem); 380 down_write(&nilfs->ns_sem);
385 if (nilfs_sb_dirty(nilfs)) { 381 if (nilfs_sb_dirty(nilfs)) {
386 sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs)); 382 sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs));
387 if (likely(sbp)) { 383 if (likely(sbp)) {
388 nilfs_set_log_cursor(sbp[0], nilfs); 384 nilfs_set_log_cursor(sbp[0], nilfs);
389 nilfs_commit_super(sbi, NILFS_SB_COMMIT); 385 nilfs_commit_super(sb, NILFS_SB_COMMIT);
390 } 386 }
391 } 387 }
392 up_write(&nilfs->ns_sem); 388 up_write(&nilfs->ns_sem);
@@ -394,10 +390,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
394 return err; 390 return err;
395} 391}
396 392
397int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, 393int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
398 struct nilfs_root **rootp) 394 struct nilfs_root **rootp)
399{ 395{
400 struct the_nilfs *nilfs = sbi->s_nilfs; 396 struct the_nilfs *nilfs = sb->s_fs_info;
401 struct nilfs_root *root; 397 struct nilfs_root *root;
402 struct nilfs_checkpoint *raw_cp; 398 struct nilfs_checkpoint *raw_cp;
403 struct buffer_head *bh_cp; 399 struct buffer_head *bh_cp;
@@ -426,7 +422,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt,
426 goto failed; 422 goto failed;
427 } 423 }
428 424
429 err = nilfs_ifile_read(sbi->s_super, root, nilfs->ns_inode_size, 425 err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
430 &raw_cp->cp_ifile_inode, &root->ifile); 426 &raw_cp->cp_ifile_inode, &root->ifile);
431 if (err) 427 if (err)
432 goto failed_bh; 428 goto failed_bh;
@@ -450,8 +446,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt,
450 446
451static int nilfs_freeze(struct super_block *sb) 447static int nilfs_freeze(struct super_block *sb)
452{ 448{
453 struct nilfs_sb_info *sbi = NILFS_SB(sb); 449 struct the_nilfs *nilfs = sb->s_fs_info;
454 struct the_nilfs *nilfs = sbi->s_nilfs;
455 int err; 450 int err;
456 451
457 if (sb->s_flags & MS_RDONLY) 452 if (sb->s_flags & MS_RDONLY)
@@ -459,21 +454,20 @@ static int nilfs_freeze(struct super_block *sb)
459 454
460 /* Mark super block clean */ 455 /* Mark super block clean */
461 down_write(&nilfs->ns_sem); 456 down_write(&nilfs->ns_sem);
462 err = nilfs_cleanup_super(sbi); 457 err = nilfs_cleanup_super(sb);
463 up_write(&nilfs->ns_sem); 458 up_write(&nilfs->ns_sem);
464 return err; 459 return err;
465} 460}
466 461
467static int nilfs_unfreeze(struct super_block *sb) 462static int nilfs_unfreeze(struct super_block *sb)
468{ 463{
469 struct nilfs_sb_info *sbi = NILFS_SB(sb); 464 struct the_nilfs *nilfs = sb->s_fs_info;
470 struct the_nilfs *nilfs = sbi->s_nilfs;
471 465
472 if (sb->s_flags & MS_RDONLY) 466 if (sb->s_flags & MS_RDONLY)
473 return 0; 467 return 0;
474 468
475 down_write(&nilfs->ns_sem); 469 down_write(&nilfs->ns_sem);
476 nilfs_setup_super(sbi, false); 470 nilfs_setup_super(sb, false);
477 up_write(&nilfs->ns_sem); 471 up_write(&nilfs->ns_sem);
478 return 0; 472 return 0;
479} 473}
@@ -530,22 +524,22 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
530static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 524static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
531{ 525{
532 struct super_block *sb = vfs->mnt_sb; 526 struct super_block *sb = vfs->mnt_sb;
533 struct nilfs_sb_info *sbi = NILFS_SB(sb); 527 struct the_nilfs *nilfs = sb->s_fs_info;
534 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; 528 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root;
535 529
536 if (!nilfs_test_opt(sbi, BARRIER)) 530 if (!nilfs_test_opt(nilfs, BARRIER))
537 seq_puts(seq, ",nobarrier"); 531 seq_puts(seq, ",nobarrier");
538 if (root->cno != NILFS_CPTREE_CURRENT_CNO) 532 if (root->cno != NILFS_CPTREE_CURRENT_CNO)
539 seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); 533 seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno);
540 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 534 if (nilfs_test_opt(nilfs, ERRORS_PANIC))
541 seq_puts(seq, ",errors=panic"); 535 seq_puts(seq, ",errors=panic");
542 if (nilfs_test_opt(sbi, ERRORS_CONT)) 536 if (nilfs_test_opt(nilfs, ERRORS_CONT))
543 seq_puts(seq, ",errors=continue"); 537 seq_puts(seq, ",errors=continue");
544 if (nilfs_test_opt(sbi, STRICT_ORDER)) 538 if (nilfs_test_opt(nilfs, STRICT_ORDER))
545 seq_puts(seq, ",order=strict"); 539 seq_puts(seq, ",order=strict");
546 if (nilfs_test_opt(sbi, NORECOVERY)) 540 if (nilfs_test_opt(nilfs, NORECOVERY))
547 seq_puts(seq, ",norecovery"); 541 seq_puts(seq, ",norecovery");
548 if (nilfs_test_opt(sbi, DISCARD)) 542 if (nilfs_test_opt(nilfs, DISCARD))
549 seq_puts(seq, ",discard"); 543 seq_puts(seq, ",discard");
550 544
551 return 0; 545 return 0;
@@ -594,7 +588,7 @@ static match_table_t tokens = {
594 588
595static int parse_options(char *options, struct super_block *sb, int is_remount) 589static int parse_options(char *options, struct super_block *sb, int is_remount)
596{ 590{
597 struct nilfs_sb_info *sbi = NILFS_SB(sb); 591 struct the_nilfs *nilfs = sb->s_fs_info;
598 char *p; 592 char *p;
599 substring_t args[MAX_OPT_ARGS]; 593 substring_t args[MAX_OPT_ARGS];
600 594
@@ -609,29 +603,29 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
609 token = match_token(p, tokens, args); 603 token = match_token(p, tokens, args);
610 switch (token) { 604 switch (token) {
611 case Opt_barrier: 605 case Opt_barrier:
612 nilfs_set_opt(sbi, BARRIER); 606 nilfs_set_opt(nilfs, BARRIER);
613 break; 607 break;
614 case Opt_nobarrier: 608 case Opt_nobarrier:
615 nilfs_clear_opt(sbi, BARRIER); 609 nilfs_clear_opt(nilfs, BARRIER);
616 break; 610 break;
617 case Opt_order: 611 case Opt_order:
618 if (strcmp(args[0].from, "relaxed") == 0) 612 if (strcmp(args[0].from, "relaxed") == 0)
619 /* Ordered data semantics */ 613 /* Ordered data semantics */
620 nilfs_clear_opt(sbi, STRICT_ORDER); 614 nilfs_clear_opt(nilfs, STRICT_ORDER);
621 else if (strcmp(args[0].from, "strict") == 0) 615 else if (strcmp(args[0].from, "strict") == 0)
622 /* Strict in-order semantics */ 616 /* Strict in-order semantics */
623 nilfs_set_opt(sbi, STRICT_ORDER); 617 nilfs_set_opt(nilfs, STRICT_ORDER);
624 else 618 else
625 return 0; 619 return 0;
626 break; 620 break;
627 case Opt_err_panic: 621 case Opt_err_panic:
628 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); 622 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
629 break; 623 break;
630 case Opt_err_ro: 624 case Opt_err_ro:
631 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); 625 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
632 break; 626 break;
633 case Opt_err_cont: 627 case Opt_err_cont:
634 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); 628 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
635 break; 629 break;
636 case Opt_snapshot: 630 case Opt_snapshot:
637 if (is_remount) { 631 if (is_remount) {
@@ -642,13 +636,13 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
642 } 636 }
643 break; 637 break;
644 case Opt_norecovery: 638 case Opt_norecovery:
645 nilfs_set_opt(sbi, NORECOVERY); 639 nilfs_set_opt(nilfs, NORECOVERY);
646 break; 640 break;
647 case Opt_discard: 641 case Opt_discard:
648 nilfs_set_opt(sbi, DISCARD); 642 nilfs_set_opt(nilfs, DISCARD);
649 break; 643 break;
650 case Opt_nodiscard: 644 case Opt_nodiscard:
651 nilfs_clear_opt(sbi, DISCARD); 645 nilfs_clear_opt(nilfs, DISCARD);
652 break; 646 break;
653 default: 647 default:
654 printk(KERN_ERR 648 printk(KERN_ERR
@@ -660,22 +654,24 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
660} 654}
661 655
662static inline void 656static inline void
663nilfs_set_default_options(struct nilfs_sb_info *sbi, 657nilfs_set_default_options(struct super_block *sb,
664 struct nilfs_super_block *sbp) 658 struct nilfs_super_block *sbp)
665{ 659{
666 sbi->s_mount_opt = 660 struct the_nilfs *nilfs = sb->s_fs_info;
661
662 nilfs->ns_mount_opt =
667 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; 663 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
668} 664}
669 665
670static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount) 666static int nilfs_setup_super(struct super_block *sb, int is_mount)
671{ 667{
672 struct the_nilfs *nilfs = sbi->s_nilfs; 668 struct the_nilfs *nilfs = sb->s_fs_info;
673 struct nilfs_super_block **sbp; 669 struct nilfs_super_block **sbp;
674 int max_mnt_count; 670 int max_mnt_count;
675 int mnt_count; 671 int mnt_count;
676 672
677 /* nilfs->ns_sem must be locked by the caller. */ 673 /* nilfs->ns_sem must be locked by the caller. */
678 sbp = nilfs_prepare_super(sbi, 0); 674 sbp = nilfs_prepare_super(sb, 0);
679 if (!sbp) 675 if (!sbp)
680 return -EIO; 676 return -EIO;
681 677
@@ -706,7 +702,7 @@ skip_mount_setup:
706 /* synchronize sbp[1] with sbp[0] */ 702 /* synchronize sbp[1] with sbp[0] */
707 if (sbp[1]) 703 if (sbp[1])
708 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); 704 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
709 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 705 return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
710} 706}
711 707
712struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, 708struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
@@ -727,7 +723,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,
727 struct nilfs_super_block *sbp, 723 struct nilfs_super_block *sbp,
728 char *data) 724 char *data)
729{ 725{
730 struct nilfs_sb_info *sbi = NILFS_SB(sb); 726 struct the_nilfs *nilfs = sb->s_fs_info;
731 727
732 sb->s_magic = le16_to_cpu(sbp->s_magic); 728 sb->s_magic = le16_to_cpu(sbp->s_magic);
733 729
@@ -736,12 +732,12 @@ int nilfs_store_magic_and_option(struct super_block *sb,
736 sb->s_flags |= MS_NOATIME; 732 sb->s_flags |= MS_NOATIME;
737#endif 733#endif
738 734
739 nilfs_set_default_options(sbi, sbp); 735 nilfs_set_default_options(sb, sbp);
740 736
741 sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); 737 nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
742 sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); 738 nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
743 sbi->s_interval = le32_to_cpu(sbp->s_c_interval); 739 nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
744 sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); 740 nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
745 741
746 return !parse_options(data, sb, 0) ? -EINVAL : 0 ; 742 return !parse_options(data, sb, 0) ? -EINVAL : 0 ;
747} 743}
@@ -822,7 +818,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
822static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, 818static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
823 struct dentry **root_dentry) 819 struct dentry **root_dentry)
824{ 820{
825 struct the_nilfs *nilfs = NILFS_SB(s)->s_nilfs; 821 struct the_nilfs *nilfs = s->s_fs_info;
826 struct nilfs_root *root; 822 struct nilfs_root *root;
827 int ret; 823 int ret;
828 824
@@ -840,7 +836,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
840 goto out; 836 goto out;
841 } 837 }
842 838
843 ret = nilfs_attach_checkpoint(NILFS_SB(s), cno, false, &root); 839 ret = nilfs_attach_checkpoint(s, cno, false, &root);
844 if (ret) { 840 if (ret) {
845 printk(KERN_ERR "NILFS: error loading snapshot " 841 printk(KERN_ERR "NILFS: error loading snapshot "
846 "(checkpoint number=%llu).\n", 842 "(checkpoint number=%llu).\n",
@@ -874,7 +870,7 @@ static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
874 870
875int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) 871int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
876{ 872{
877 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 873 struct the_nilfs *nilfs = sb->s_fs_info;
878 struct nilfs_root *root; 874 struct nilfs_root *root;
879 struct inode *inode; 875 struct inode *inode;
880 struct dentry *dentry; 876 struct dentry *dentry;
@@ -887,7 +883,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
887 return true; /* protect recent checkpoints */ 883 return true; /* protect recent checkpoints */
888 884
889 ret = false; 885 ret = false;
890 root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); 886 root = nilfs_lookup_root(nilfs, cno);
891 if (root) { 887 if (root) {
892 inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); 888 inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO);
893 if (inode) { 889 if (inode) {
@@ -917,43 +913,21 @@ static int
917nilfs_fill_super(struct super_block *sb, void *data, int silent) 913nilfs_fill_super(struct super_block *sb, void *data, int silent)
918{ 914{
919 struct the_nilfs *nilfs; 915 struct the_nilfs *nilfs;
920 struct nilfs_sb_info *sbi;
921 struct nilfs_root *fsroot; 916 struct nilfs_root *fsroot;
922 struct backing_dev_info *bdi; 917 struct backing_dev_info *bdi;
923 __u64 cno; 918 __u64 cno;
924 int err; 919 int err;
925 920
926 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 921 nilfs = alloc_nilfs(sb->s_bdev);
927 if (!sbi) 922 if (!nilfs)
928 return -ENOMEM; 923 return -ENOMEM;
929 924
930 sb->s_fs_info = sbi; 925 sb->s_fs_info = nilfs;
931 sbi->s_super = sb;
932
933 nilfs = alloc_nilfs(sb->s_bdev);
934 if (!nilfs) {
935 err = -ENOMEM;
936 goto failed_sbi;
937 }
938 sbi->s_nilfs = nilfs;
939 926
940 err = init_nilfs(nilfs, sbi, (char *)data); 927 err = init_nilfs(nilfs, sb, (char *)data);
941 if (err) 928 if (err)
942 goto failed_nilfs; 929 goto failed_nilfs;
943 930
944 spin_lock_init(&sbi->s_inode_lock);
945 INIT_LIST_HEAD(&sbi->s_dirty_files);
946
947 /*
948 * Following initialization is overlapped because
949 * nilfs_sb_info structure has been cleared at the beginning.
950 * But we reserve them to keep our interest and make ready
951 * for the future change.
952 */
953 get_random_bytes(&sbi->s_next_generation,
954 sizeof(sbi->s_next_generation));
955 spin_lock_init(&sbi->s_next_gen_lock);
956
957 sb->s_op = &nilfs_sops; 931 sb->s_op = &nilfs_sops;
958 sb->s_export_op = &nilfs_export_ops; 932 sb->s_export_op = &nilfs_export_ops;
959 sb->s_root = NULL; 933 sb->s_root = NULL;
@@ -962,12 +936,12 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
962 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 936 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
963 sb->s_bdi = bdi ? : &default_backing_dev_info; 937 sb->s_bdi = bdi ? : &default_backing_dev_info;
964 938
965 err = load_nilfs(nilfs, sbi); 939 err = load_nilfs(nilfs, sb);
966 if (err) 940 if (err)
967 goto failed_nilfs; 941 goto failed_nilfs;
968 942
969 cno = nilfs_last_cno(nilfs); 943 cno = nilfs_last_cno(nilfs);
970 err = nilfs_attach_checkpoint(sbi, cno, true, &fsroot); 944 err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
971 if (err) { 945 if (err) {
972 printk(KERN_ERR "NILFS: error loading last checkpoint " 946 printk(KERN_ERR "NILFS: error loading last checkpoint "
973 "(checkpoint number=%llu).\n", (unsigned long long)cno); 947 "(checkpoint number=%llu).\n", (unsigned long long)cno);
@@ -975,7 +949,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
975 } 949 }
976 950
977 if (!(sb->s_flags & MS_RDONLY)) { 951 if (!(sb->s_flags & MS_RDONLY)) {
978 err = nilfs_attach_segment_constructor(sbi, fsroot); 952 err = nilfs_attach_log_writer(sb, fsroot);
979 if (err) 953 if (err)
980 goto failed_checkpoint; 954 goto failed_checkpoint;
981 } 955 }
@@ -988,14 +962,14 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
988 962
989 if (!(sb->s_flags & MS_RDONLY)) { 963 if (!(sb->s_flags & MS_RDONLY)) {
990 down_write(&nilfs->ns_sem); 964 down_write(&nilfs->ns_sem);
991 nilfs_setup_super(sbi, true); 965 nilfs_setup_super(sb, true);
992 up_write(&nilfs->ns_sem); 966 up_write(&nilfs->ns_sem);
993 } 967 }
994 968
995 return 0; 969 return 0;
996 970
997 failed_segctor: 971 failed_segctor:
998 nilfs_detach_segment_constructor(sbi); 972 nilfs_detach_log_writer(sb);
999 973
1000 failed_checkpoint: 974 failed_checkpoint:
1001 nilfs_put_root(fsroot); 975 nilfs_put_root(fsroot);
@@ -1007,23 +981,18 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1007 981
1008 failed_nilfs: 982 failed_nilfs:
1009 destroy_nilfs(nilfs); 983 destroy_nilfs(nilfs);
1010
1011 failed_sbi:
1012 sb->s_fs_info = NULL;
1013 kfree(sbi);
1014 return err; 984 return err;
1015} 985}
1016 986
1017static int nilfs_remount(struct super_block *sb, int *flags, char *data) 987static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1018{ 988{
1019 struct nilfs_sb_info *sbi = NILFS_SB(sb); 989 struct the_nilfs *nilfs = sb->s_fs_info;
1020 struct the_nilfs *nilfs = sbi->s_nilfs;
1021 unsigned long old_sb_flags; 990 unsigned long old_sb_flags;
1022 unsigned long old_mount_opt; 991 unsigned long old_mount_opt;
1023 int err; 992 int err;
1024 993
1025 old_sb_flags = sb->s_flags; 994 old_sb_flags = sb->s_flags;
1026 old_mount_opt = sbi->s_mount_opt; 995 old_mount_opt = nilfs->ns_mount_opt;
1027 996
1028 if (!parse_options(data, sb, 1)) { 997 if (!parse_options(data, sb, 1)) {
1029 err = -EINVAL; 998 err = -EINVAL;
@@ -1043,8 +1012,8 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1043 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1012 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1044 goto out; 1013 goto out;
1045 if (*flags & MS_RDONLY) { 1014 if (*flags & MS_RDONLY) {
1046 /* Shutting down the segment constructor */ 1015 /* Shutting down log writer */
1047 nilfs_detach_segment_constructor(sbi); 1016 nilfs_detach_log_writer(sb);
1048 sb->s_flags |= MS_RDONLY; 1017 sb->s_flags |= MS_RDONLY;
1049 1018
1050 /* 1019 /*
@@ -1052,7 +1021,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1052 * the RDONLY flag and then mark the partition as valid again. 1021 * the RDONLY flag and then mark the partition as valid again.
1053 */ 1022 */
1054 down_write(&nilfs->ns_sem); 1023 down_write(&nilfs->ns_sem);
1055 nilfs_cleanup_super(sbi); 1024 nilfs_cleanup_super(sb);
1056 up_write(&nilfs->ns_sem); 1025 up_write(&nilfs->ns_sem);
1057 } else { 1026 } else {
1058 __u64 features; 1027 __u64 features;
@@ -1079,12 +1048,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1079 sb->s_flags &= ~MS_RDONLY; 1048 sb->s_flags &= ~MS_RDONLY;
1080 1049
1081 root = NILFS_I(sb->s_root->d_inode)->i_root; 1050 root = NILFS_I(sb->s_root->d_inode)->i_root;
1082 err = nilfs_attach_segment_constructor(sbi, root); 1051 err = nilfs_attach_log_writer(sb, root);
1083 if (err) 1052 if (err)
1084 goto restore_opts; 1053 goto restore_opts;
1085 1054
1086 down_write(&nilfs->ns_sem); 1055 down_write(&nilfs->ns_sem);
1087 nilfs_setup_super(sbi, true); 1056 nilfs_setup_super(sb, true);
1088 up_write(&nilfs->ns_sem); 1057 up_write(&nilfs->ns_sem);
1089 } 1058 }
1090 out: 1059 out:
@@ -1092,13 +1061,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1092 1061
1093 restore_opts: 1062 restore_opts:
1094 sb->s_flags = old_sb_flags; 1063 sb->s_flags = old_sb_flags;
1095 sbi->s_mount_opt = old_mount_opt; 1064 nilfs->ns_mount_opt = old_mount_opt;
1096 return err; 1065 return err;
1097} 1066}
1098 1067
1099struct nilfs_super_data { 1068struct nilfs_super_data {
1100 struct block_device *bdev; 1069 struct block_device *bdev;
1101 struct nilfs_sb_info *sbi;
1102 __u64 cno; 1070 __u64 cno;
1103 int flags; 1071 int flags;
1104}; 1072};
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ad4ac607cf57..d2acd1a651f3 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -25,6 +25,7 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/random.h>
28#include <linux/crc32.h> 29#include <linux/crc32.h>
29#include "nilfs.h" 30#include "nilfs.h"
30#include "segment.h" 31#include "segment.h"
@@ -75,7 +76,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
75 nilfs->ns_bdev = bdev; 76 nilfs->ns_bdev = bdev;
76 atomic_set(&nilfs->ns_ndirtyblks, 0); 77 atomic_set(&nilfs->ns_ndirtyblks, 0);
77 init_rwsem(&nilfs->ns_sem); 78 init_rwsem(&nilfs->ns_sem);
79 INIT_LIST_HEAD(&nilfs->ns_dirty_files);
78 INIT_LIST_HEAD(&nilfs->ns_gc_inodes); 80 INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
81 spin_lock_init(&nilfs->ns_inode_lock);
82 spin_lock_init(&nilfs->ns_next_gen_lock);
79 spin_lock_init(&nilfs->ns_last_segment_lock); 83 spin_lock_init(&nilfs->ns_last_segment_lock);
80 nilfs->ns_cptree = RB_ROOT; 84 nilfs->ns_cptree = RB_ROOT;
81 spin_lock_init(&nilfs->ns_cptree_lock); 85 spin_lock_init(&nilfs->ns_cptree_lock);
@@ -197,16 +201,16 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
197/** 201/**
198 * load_nilfs - load and recover the nilfs 202 * load_nilfs - load and recover the nilfs
199 * @nilfs: the_nilfs structure to be released 203 * @nilfs: the_nilfs structure to be released
200 * @sbi: nilfs_sb_info used to recover past segment 204 * @sb: super block isntance used to recover past segment
201 * 205 *
202 * load_nilfs() searches and load the latest super root, 206 * load_nilfs() searches and load the latest super root,
203 * attaches the last segment, and does recovery if needed. 207 * attaches the last segment, and does recovery if needed.
204 * The caller must call this exclusively for simultaneous mounts. 208 * The caller must call this exclusively for simultaneous mounts.
205 */ 209 */
206int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) 210int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
207{ 211{
208 struct nilfs_recovery_info ri; 212 struct nilfs_recovery_info ri;
209 unsigned int s_flags = sbi->s_super->s_flags; 213 unsigned int s_flags = sb->s_flags;
210 int really_read_only = bdev_read_only(nilfs->ns_bdev); 214 int really_read_only = bdev_read_only(nilfs->ns_bdev);
211 int valid_fs = nilfs_valid_fs(nilfs); 215 int valid_fs = nilfs_valid_fs(nilfs);
212 int err; 216 int err;
@@ -271,7 +275,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
271 goto scan_error; 275 goto scan_error;
272 } 276 }
273 277
274 err = nilfs_load_super_root(nilfs, sbi->s_super, ri.ri_super_root); 278 err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root);
275 if (unlikely(err)) { 279 if (unlikely(err)) {
276 printk(KERN_ERR "NILFS: error loading super root.\n"); 280 printk(KERN_ERR "NILFS: error loading super root.\n");
277 goto failed; 281 goto failed;
@@ -283,7 +287,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
283 if (s_flags & MS_RDONLY) { 287 if (s_flags & MS_RDONLY) {
284 __u64 features; 288 __u64 features;
285 289
286 if (nilfs_test_opt(sbi, NORECOVERY)) { 290 if (nilfs_test_opt(nilfs, NORECOVERY)) {
287 printk(KERN_INFO "NILFS: norecovery option specified. " 291 printk(KERN_INFO "NILFS: norecovery option specified. "
288 "skipping roll-forward recovery\n"); 292 "skipping roll-forward recovery\n");
289 goto skip_recovery; 293 goto skip_recovery;
@@ -304,21 +308,21 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
304 err = -EROFS; 308 err = -EROFS;
305 goto failed_unload; 309 goto failed_unload;
306 } 310 }
307 sbi->s_super->s_flags &= ~MS_RDONLY; 311 sb->s_flags &= ~MS_RDONLY;
308 } else if (nilfs_test_opt(sbi, NORECOVERY)) { 312 } else if (nilfs_test_opt(nilfs, NORECOVERY)) {
309 printk(KERN_ERR "NILFS: recovery cancelled because norecovery " 313 printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
310 "option was specified for a read/write mount\n"); 314 "option was specified for a read/write mount\n");
311 err = -EINVAL; 315 err = -EINVAL;
312 goto failed_unload; 316 goto failed_unload;
313 } 317 }
314 318
315 err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri); 319 err = nilfs_salvage_orphan_logs(nilfs, sb, &ri);
316 if (err) 320 if (err)
317 goto failed_unload; 321 goto failed_unload;
318 322
319 down_write(&nilfs->ns_sem); 323 down_write(&nilfs->ns_sem);
320 nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */ 324 nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */
321 err = nilfs_cleanup_super(sbi); 325 err = nilfs_cleanup_super(sb);
322 up_write(&nilfs->ns_sem); 326 up_write(&nilfs->ns_sem);
323 327
324 if (err) { 328 if (err) {
@@ -330,7 +334,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
330 334
331 skip_recovery: 335 skip_recovery:
332 nilfs_clear_recovery_info(&ri); 336 nilfs_clear_recovery_info(&ri);
333 sbi->s_super->s_flags = s_flags; 337 sb->s_flags = s_flags;
334 return 0; 338 return 0;
335 339
336 scan_error: 340 scan_error:
@@ -344,7 +348,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
344 348
345 failed: 349 failed:
346 nilfs_clear_recovery_info(&ri); 350 nilfs_clear_recovery_info(&ri);
347 sbi->s_super->s_flags = s_flags; 351 sb->s_flags = s_flags;
348 return err; 352 return err;
349} 353}
350 354
@@ -475,10 +479,13 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
475 return -EIO; 479 return -EIO;
476 } 480 }
477 printk(KERN_WARNING 481 printk(KERN_WARNING
478 "NILFS warning: unable to read primary superblock\n"); 482 "NILFS warning: unable to read primary superblock "
479 } else if (!sbp[1]) 483 "(blocksize = %d)\n", blocksize);
484 } else if (!sbp[1]) {
480 printk(KERN_WARNING 485 printk(KERN_WARNING
481 "NILFS warning: unable to read secondary superblock\n"); 486 "NILFS warning: unable to read secondary superblock "
487 "(blocksize = %d)\n", blocksize);
488 }
482 489
483 /* 490 /*
484 * Compare two super blocks and set 1 in swp if the secondary 491 * Compare two super blocks and set 1 in swp if the secondary
@@ -505,7 +512,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
505 512
506 if (!valid[!swp]) 513 if (!valid[!swp])
507 printk(KERN_WARNING "NILFS warning: broken superblock. " 514 printk(KERN_WARNING "NILFS warning: broken superblock. "
508 "using spare superblock.\n"); 515 "using spare superblock (blocksize = %d).\n", blocksize);
509 if (swp) 516 if (swp)
510 nilfs_swap_super_block(nilfs); 517 nilfs_swap_super_block(nilfs);
511 518
@@ -519,7 +526,6 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
519/** 526/**
520 * init_nilfs - initialize a NILFS instance. 527 * init_nilfs - initialize a NILFS instance.
521 * @nilfs: the_nilfs structure 528 * @nilfs: the_nilfs structure
522 * @sbi: nilfs_sb_info
523 * @sb: super block 529 * @sb: super block
524 * @data: mount options 530 * @data: mount options
525 * 531 *
@@ -530,9 +536,8 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
530 * Return Value: On success, 0 is returned. On error, a negative error 536 * Return Value: On success, 0 is returned. On error, a negative error
531 * code is returned. 537 * code is returned.
532 */ 538 */
533int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) 539int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
534{ 540{
535 struct super_block *sb = sbi->s_super;
536 struct nilfs_super_block *sbp; 541 struct nilfs_super_block *sbp;
537 int blocksize; 542 int blocksize;
538 int err; 543 int err;
@@ -588,6 +593,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
588 nilfs->ns_blocksize_bits = sb->s_blocksize_bits; 593 nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
589 nilfs->ns_blocksize = blocksize; 594 nilfs->ns_blocksize = blocksize;
590 595
596 get_random_bytes(&nilfs->ns_next_generation,
597 sizeof(nilfs->ns_next_generation));
598
591 err = nilfs_store_disk_layout(nilfs, sbp); 599 err = nilfs_store_disk_layout(nilfs, sbp);
592 if (err) 600 if (err)
593 goto failed_sbh; 601 goto failed_sbh;
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index fd85e4c05c6b..f4968145c2a3 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -31,7 +31,8 @@
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include "sb.h" 34
35struct nilfs_sc_info;
35 36
36/* the_nilfs struct */ 37/* the_nilfs struct */
37enum { 38enum {
@@ -65,13 +66,23 @@ enum {
65 * @ns_last_cno: checkpoint number of the latest segment 66 * @ns_last_cno: checkpoint number of the latest segment
66 * @ns_prot_seq: least sequence number of segments which must not be reclaimed 67 * @ns_prot_seq: least sequence number of segments which must not be reclaimed
67 * @ns_prev_seq: base sequence number used to decide if advance log cursor 68 * @ns_prev_seq: base sequence number used to decide if advance log cursor
68 * @ns_segctor_sem: segment constructor semaphore 69 * @ns_writer: log writer
70 * @ns_segctor_sem: semaphore protecting log write
69 * @ns_dat: DAT file inode 71 * @ns_dat: DAT file inode
70 * @ns_cpfile: checkpoint file inode 72 * @ns_cpfile: checkpoint file inode
71 * @ns_sufile: segusage file inode 73 * @ns_sufile: segusage file inode
72 * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) 74 * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root)
73 * @ns_cptree_lock: lock protecting @ns_cptree 75 * @ns_cptree_lock: lock protecting @ns_cptree
76 * @ns_dirty_files: list of dirty files
77 * @ns_inode_lock: lock protecting @ns_dirty_files
74 * @ns_gc_inodes: dummy inodes to keep live blocks 78 * @ns_gc_inodes: dummy inodes to keep live blocks
79 * @ns_next_generation: next generation number for inodes
80 * @ns_next_gen_lock: lock protecting @ns_next_generation
81 * @ns_mount_opt: mount options
82 * @ns_resuid: uid for reserved blocks
83 * @ns_resgid: gid for reserved blocks
84 * @ns_interval: checkpoint creation interval
85 * @ns_watermark: watermark for the number of dirty buffers
75 * @ns_blocksize_bits: bit length of block size 86 * @ns_blocksize_bits: bit length of block size
76 * @ns_blocksize: block size 87 * @ns_blocksize: block size
77 * @ns_nsegments: number of segments in filesystem 88 * @ns_nsegments: number of segments in filesystem
@@ -131,6 +142,7 @@ struct the_nilfs {
131 u64 ns_prot_seq; 142 u64 ns_prot_seq;
132 u64 ns_prev_seq; 143 u64 ns_prev_seq;
133 144
145 struct nilfs_sc_info *ns_writer;
134 struct rw_semaphore ns_segctor_sem; 146 struct rw_semaphore ns_segctor_sem;
135 147
136 /* 148 /*
@@ -145,9 +157,25 @@ struct the_nilfs {
145 struct rb_root ns_cptree; 157 struct rb_root ns_cptree;
146 spinlock_t ns_cptree_lock; 158 spinlock_t ns_cptree_lock;
147 159
160 /* Dirty inode list */
161 struct list_head ns_dirty_files;
162 spinlock_t ns_inode_lock;
163
148 /* GC inode list */ 164 /* GC inode list */
149 struct list_head ns_gc_inodes; 165 struct list_head ns_gc_inodes;
150 166
167 /* Inode allocator */
168 u32 ns_next_generation;
169 spinlock_t ns_next_gen_lock;
170
171 /* Mount options */
172 unsigned long ns_mount_opt;
173
174 uid_t ns_resuid;
175 gid_t ns_resgid;
176 unsigned long ns_interval;
177 unsigned long ns_watermark;
178
151 /* Disk layout information (static) */ 179 /* Disk layout information (static) */
152 unsigned int ns_blocksize_bits; 180 unsigned int ns_blocksize_bits;
153 unsigned int ns_blocksize; 181 unsigned int ns_blocksize;
@@ -180,6 +208,20 @@ THE_NILFS_FNS(DISCONTINUED, discontinued)
180THE_NILFS_FNS(GC_RUNNING, gc_running) 208THE_NILFS_FNS(GC_RUNNING, gc_running)
181THE_NILFS_FNS(SB_DIRTY, sb_dirty) 209THE_NILFS_FNS(SB_DIRTY, sb_dirty)
182 210
211/*
212 * Mount option operations
213 */
214#define nilfs_clear_opt(nilfs, opt) \
215 do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
216#define nilfs_set_opt(nilfs, opt) \
217 do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0)
218#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
219#define nilfs_write_opt(nilfs, mask, opt) \
220 do { (nilfs)->ns_mount_opt = \
221 (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \
222 NILFS_MOUNT_##opt); \
223 } while (0)
224
183/** 225/**
184 * struct nilfs_root - nilfs root object 226 * struct nilfs_root - nilfs root object
185 * @cno: checkpoint number 227 * @cno: checkpoint number
@@ -224,15 +266,14 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
224void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); 266void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
225struct the_nilfs *alloc_nilfs(struct block_device *bdev); 267struct the_nilfs *alloc_nilfs(struct block_device *bdev);
226void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
227int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
228int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
229int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
230int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
231struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
232struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, 274struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs,
233 __u64 cno); 275 __u64 cno);
234void nilfs_put_root(struct nilfs_root *root); 276void nilfs_put_root(struct nilfs_root *root);
235struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
236int nilfs_near_disk_full(struct the_nilfs *); 277int nilfs_near_disk_full(struct the_nilfs *);
237void nilfs_fall_back_super_block(struct the_nilfs *); 278void nilfs_fall_back_super_block(struct the_nilfs *);
238void nilfs_swap_super_block(struct the_nilfs *); 279void nilfs_swap_super_block(struct the_nilfs *);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6b1305dc26c0..9fde1c00a296 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -164,7 +164,7 @@ static int process_access_response(struct fsnotify_group *group,
164 fd, response); 164 fd, response);
165 /* 165 /*
166 * make sure the response is valid, if invalid we do nothing and either 166 * make sure the response is valid, if invalid we do nothing and either
167 * userspace can send a valid responce or we will clean it up after the 167 * userspace can send a valid response or we will clean it up after the
168 * timeout 168 * timeout
169 */ 169 */
170 switch (response) { 170 switch (response) {
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4c29fcf557d1..07ea8d3e6ea2 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -22,13 +22,14 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/writeback.h> /* for inode_lock */
26 25
27#include <asm/atomic.h> 26#include <asm/atomic.h>
28 27
29#include <linux/fsnotify_backend.h> 28#include <linux/fsnotify_backend.h>
30#include "fsnotify.h" 29#include "fsnotify.h"
31 30
31#include "../internal.h"
32
32/* 33/*
33 * Recalculate the mask of events relevant to a given inode locked. 34 * Recalculate the mask of events relevant to a given inode locked.
34 */ 35 */
@@ -237,15 +238,14 @@ out:
237 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 238 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
238 * @list: list of inodes being unmounted (sb->s_inodes) 239 * @list: list of inodes being unmounted (sb->s_inodes)
239 * 240 *
240 * Called with inode_lock held, protecting the unmounting super block's list 241 * Called during unmount with no locks held, so needs to be safe against
241 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. 242 * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
242 * We temporarily drop inode_lock, however, and CAN block.
243 */ 243 */
244void fsnotify_unmount_inodes(struct list_head *list) 244void fsnotify_unmount_inodes(struct list_head *list)
245{ 245{
246 struct inode *inode, *next_i, *need_iput = NULL; 246 struct inode *inode, *next_i, *need_iput = NULL;
247 247
248 spin_lock(&inode_lock); 248 spin_lock(&inode_sb_list_lock);
249 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 249 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
250 struct inode *need_iput_tmp; 250 struct inode *need_iput_tmp;
251 251
@@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
254 * I_WILL_FREE, or I_NEW which is fine because by that point 254 * I_WILL_FREE, or I_NEW which is fine because by that point
255 * the inode cannot have any associated watches. 255 * the inode cannot have any associated watches.
256 */ 256 */
257 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 257 spin_lock(&inode->i_lock);
258 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
259 spin_unlock(&inode->i_lock);
258 continue; 260 continue;
261 }
259 262
260 /* 263 /*
261 * If i_count is zero, the inode cannot have any watches and 264 * If i_count is zero, the inode cannot have any watches and
@@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list)
263 * evict all inodes with zero i_count from icache which is 266 * evict all inodes with zero i_count from icache which is
264 * unnecessarily violent and may in fact be illegal to do. 267 * unnecessarily violent and may in fact be illegal to do.
265 */ 268 */
266 if (!atomic_read(&inode->i_count)) 269 if (!atomic_read(&inode->i_count)) {
270 spin_unlock(&inode->i_lock);
267 continue; 271 continue;
272 }
268 273
269 need_iput_tmp = need_iput; 274 need_iput_tmp = need_iput;
270 need_iput = NULL; 275 need_iput = NULL;
@@ -274,22 +279,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
274 __iget(inode); 279 __iget(inode);
275 else 280 else
276 need_iput_tmp = NULL; 281 need_iput_tmp = NULL;
282 spin_unlock(&inode->i_lock);
277 283
278 /* In case the dropping of a reference would nuke next_i. */ 284 /* In case the dropping of a reference would nuke next_i. */
279 if ((&next_i->i_sb_list != list) && 285 if ((&next_i->i_sb_list != list) &&
280 atomic_read(&next_i->i_count) && 286 atomic_read(&next_i->i_count)) {
281 !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 287 spin_lock(&next_i->i_lock);
282 __iget(next_i); 288 if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
283 need_iput = next_i; 289 __iget(next_i);
290 need_iput = next_i;
291 }
292 spin_unlock(&next_i->i_lock);
284 } 293 }
285 294
286 /* 295 /*
287 * We can safely drop inode_lock here because we hold 296 * We can safely drop inode_sb_list_lock here because we hold
288 * references on both inode and next_i. Also no new inodes 297 * references on both inode and next_i. Also no new inodes
289 * will be added since the umount has begun. Finally, 298 * will be added since the umount has begun.
290 * iprune_mutex keeps shrink_icache_memory() away.
291 */ 299 */
292 spin_unlock(&inode_lock); 300 spin_unlock(&inode_sb_list_lock);
293 301
294 if (need_iput_tmp) 302 if (need_iput_tmp)
295 iput(need_iput_tmp); 303 iput(need_iput_tmp);
@@ -301,7 +309,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
301 309
302 iput(inode); 310 iput(inode);
303 311
304 spin_lock(&inode_lock); 312 spin_lock(&inode_sb_list_lock);
305 } 313 }
306 spin_unlock(&inode_lock); 314 spin_unlock(&inode_sb_list_lock);
307} 315}
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a91b69a6a291..e3cbd746f64a 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -194,10 +194,11 @@ static int idr_callback(int id, void *p, void *data)
194 194
195static void inotify_free_group_priv(struct fsnotify_group *group) 195static void inotify_free_group_priv(struct fsnotify_group *group)
196{ 196{
197 /* ideally the idr is empty and we won't hit the BUG in teh callback */ 197 /* ideally the idr is empty and we won't hit the BUG in the callback */
198 idr_for_each(&group->inotify_data.idr, idr_callback, group); 198 idr_for_each(&group->inotify_data.idr, idr_callback, group);
199 idr_remove_all(&group->inotify_data.idr); 199 idr_remove_all(&group->inotify_data.idr);
200 idr_destroy(&group->inotify_data.idr); 200 idr_destroy(&group->inotify_data.idr);
201 atomic_dec(&group->inotify_data.user->inotify_devs);
201 free_uid(group->inotify_data.user); 202 free_uid(group->inotify_data.user);
202} 203}
203 204
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index bd46e7c8a0ef..8445fbc8985c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -290,7 +290,6 @@ static int inotify_fasync(int fd, struct file *file, int on)
290static int inotify_release(struct inode *ignored, struct file *file) 290static int inotify_release(struct inode *ignored, struct file *file)
291{ 291{
292 struct fsnotify_group *group = file->private_data; 292 struct fsnotify_group *group = file->private_data;
293 struct user_struct *user = group->inotify_data.user;
294 293
295 pr_debug("%s: group=%p\n", __func__, group); 294 pr_debug("%s: group=%p\n", __func__, group);
296 295
@@ -299,8 +298,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
299 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 298 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
300 fsnotify_put_group(group); 299 fsnotify_put_group(group);
301 300
302 atomic_dec(&user->inotify_devs);
303
304 return 0; 301 return 0;
305} 302}
306 303
@@ -697,7 +694,7 @@ retry:
697 return ret; 694 return ret;
698} 695}
699 696
700static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) 697static struct fsnotify_group *inotify_new_group(unsigned int max_events)
701{ 698{
702 struct fsnotify_group *group; 699 struct fsnotify_group *group;
703 700
@@ -710,8 +707,14 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
710 spin_lock_init(&group->inotify_data.idr_lock); 707 spin_lock_init(&group->inotify_data.idr_lock);
711 idr_init(&group->inotify_data.idr); 708 idr_init(&group->inotify_data.idr);
712 group->inotify_data.last_wd = 0; 709 group->inotify_data.last_wd = 0;
713 group->inotify_data.user = user;
714 group->inotify_data.fa = NULL; 710 group->inotify_data.fa = NULL;
711 group->inotify_data.user = get_current_user();
712
713 if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
714 inotify_max_user_instances) {
715 fsnotify_put_group(group);
716 return ERR_PTR(-EMFILE);
717 }
715 718
716 return group; 719 return group;
717} 720}
@@ -721,7 +724,6 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
721SYSCALL_DEFINE1(inotify_init1, int, flags) 724SYSCALL_DEFINE1(inotify_init1, int, flags)
722{ 725{
723 struct fsnotify_group *group; 726 struct fsnotify_group *group;
724 struct user_struct *user;
725 int ret; 727 int ret;
726 728
727 /* Check the IN_* constants for consistency. */ 729 /* Check the IN_* constants for consistency. */
@@ -731,31 +733,16 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
731 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) 733 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
732 return -EINVAL; 734 return -EINVAL;
733 735
734 user = get_current_user();
735 if (unlikely(atomic_read(&user->inotify_devs) >=
736 inotify_max_user_instances)) {
737 ret = -EMFILE;
738 goto out_free_uid;
739 }
740
741 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ 736 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
742 group = inotify_new_group(user, inotify_max_queued_events); 737 group = inotify_new_group(inotify_max_queued_events);
743 if (IS_ERR(group)) { 738 if (IS_ERR(group))
744 ret = PTR_ERR(group); 739 return PTR_ERR(group);
745 goto out_free_uid;
746 }
747
748 atomic_inc(&user->inotify_devs);
749 740
750 ret = anon_inode_getfd("inotify", &inotify_fops, group, 741 ret = anon_inode_getfd("inotify", &inotify_fops, group,
751 O_RDONLY | flags); 742 O_RDONLY | flags);
752 if (ret >= 0) 743 if (ret < 0)
753 return ret; 744 fsnotify_put_group(group);
754 745
755 fsnotify_put_group(group);
756 atomic_dec(&user->inotify_devs);
757out_free_uid:
758 free_uid(user);
759 return ret; 746 return ret;
760} 747}
761 748
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 325185e514bb..252ab1f6452b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -24,7 +24,7 @@
24 * referencing this object. The object typically will live inside the kernel 24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task 25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference 26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped. 27 * and the object itself is guaranteed to survive until the reference is dropped.
28 * 28 *
29 * LOCKING: 29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST 30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
@@ -91,7 +91,6 @@
91#include <linux/slab.h> 91#include <linux/slab.h>
92#include <linux/spinlock.h> 92#include <linux/spinlock.h>
93#include <linux/srcu.h> 93#include <linux/srcu.h>
94#include <linux/writeback.h> /* for inode_lock */
95 94
96#include <asm/atomic.h> 95#include <asm/atomic.h>
97 96
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 85eebff6d0d7..e86577d6c5c3 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -23,7 +23,6 @@
23#include <linux/mount.h> 23#include <linux/mount.h>
24#include <linux/mutex.h> 24#include <linux/mutex.h>
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/writeback.h> /* for inode_lock */
27 26
28#include <asm/atomic.h> 27#include <asm/atomic.h>
29 28
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 4ff028fcfd6e..30206b238433 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -2,18 +2,13 @@
2 2
3obj-$(CONFIG_NTFS_FS) += ntfs.o 3obj-$(CONFIG_NTFS_FS) += ntfs.o
4 4
5ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ 5ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.30\" 9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ccflags-y := -DNTFS_VERSION=\"2.1.30\"
12EXTRA_CFLAGS += -DDEBUG 12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
13endif 13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
14 14
15ifeq ($(CONFIG_NTFS_RW),y)
16EXTRA_CFLAGS += -DNTFS_RW
17
18ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
19endif
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index c3c2c7ac9020..0b1e885b8cf8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1543,8 +1543,6 @@ err_out:
1543 */ 1543 */
1544const struct address_space_operations ntfs_aops = { 1544const struct address_space_operations ntfs_aops = {
1545 .readpage = ntfs_readpage, /* Fill page with data. */ 1545 .readpage = ntfs_readpage, /* Fill page with data. */
1546 .sync_page = block_sync_page, /* Currently, just unplugs the
1547 disk request queue. */
1548#ifdef NTFS_RW 1546#ifdef NTFS_RW
1549 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 1547 .writepage = ntfs_writepage, /* Write dirty page to disk. */
1550#endif /* NTFS_RW */ 1548#endif /* NTFS_RW */
@@ -1560,8 +1558,6 @@ const struct address_space_operations ntfs_aops = {
1560 */ 1558 */
1561const struct address_space_operations ntfs_mst_aops = { 1559const struct address_space_operations ntfs_mst_aops = {
1562 .readpage = ntfs_readpage, /* Fill page with data. */ 1560 .readpage = ntfs_readpage, /* Fill page with data. */
1563 .sync_page = block_sync_page, /* Currently, just unplugs the
1564 disk request queue. */
1565#ifdef NTFS_RW 1561#ifdef NTFS_RW
1566 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 1562 .writepage = ntfs_writepage, /* Write dirty page to disk. */
1567 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty 1563 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index f5094ee224c1..f14fde2b03d6 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -197,7 +197,7 @@ err_out:
197 } else if (ctx_needs_reset) { 197 } else if (ctx_needs_reset) {
198 /* 198 /*
199 * If there is no attribute list, restoring the search context 199 * If there is no attribute list, restoring the search context
200 * is acomplished simply by copying the saved context back over 200 * is accomplished simply by copying the saved context back over
201 * the caller supplied context. If there is an attribute list, 201 * the caller supplied context. If there is an attribute list,
202 * things are more complicated as we need to deal with mapping 202 * things are more complicated as we need to deal with mapping
203 * of mft records and resulting potential changes in pointers. 203 * of mft records and resulting potential changes in pointers.
@@ -1181,7 +1181,7 @@ not_found:
1181 * for, i.e. if one wants to add the attribute to the mft record this is the 1181 * for, i.e. if one wants to add the attribute to the mft record this is the
1182 * correct place to insert its attribute list entry into. 1182 * correct place to insert its attribute list entry into.
1183 * 1183 *
1184 * When -errno != -ENOENT, an error occured during the lookup. @ctx->attr is 1184 * When -errno != -ENOENT, an error occurred during the lookup. @ctx->attr is
1185 * then undefined and in particular you should not rely on it not changing. 1185 * then undefined and in particular you should not rely on it not changing.
1186 */ 1186 */
1187int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, 1187int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6551c7cbad92..ee4144ce5d7c 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -501,7 +501,7 @@ int ntfs_read_compressed_block(struct page *page)
501 VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> 501 VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
502 vol->cluster_size_bits; 502 vol->cluster_size_bits;
503 /* 503 /*
504 * The first vcn after the last wanted vcn (minumum alignment is again 504 * The first vcn after the last wanted vcn (minimum alignment is again
505 * PAGE_CACHE_SIZE. 505 * PAGE_CACHE_SIZE.
506 */ 506 */
507 VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) 507 VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
@@ -698,8 +698,7 @@ lock_retry_remap:
698 "uptodate! Unplugging the disk queue " 698 "uptodate! Unplugging the disk queue "
699 "and rescheduling."); 699 "and rescheduling.");
700 get_bh(tbh); 700 get_bh(tbh);
701 blk_run_address_space(mapping); 701 io_schedule();
702 schedule();
703 put_bh(tbh); 702 put_bh(tbh);
704 if (unlikely(!buffer_uptodate(tbh))) 703 if (unlikely(!buffer_uptodate(tbh)))
705 goto read_err; 704 goto read_err;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index a627ed82c0a3..c05d6dcf77a4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -54,7 +54,7 @@
54 * 54 *
55 * Return 1 if the attributes match and 0 if not. 55 * Return 1 if the attributes match and 0 if not.
56 * 56 *
57 * NOTE: This function runs with the inode_lock spin lock held so it is not 57 * NOTE: This function runs with the inode->i_lock spin lock held so it is not
58 * allowed to sleep. 58 * allowed to sleep.
59 */ 59 */
60int ntfs_test_inode(struct inode *vi, ntfs_attr *na) 60int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
98 * 98 *
99 * Return 0 on success and -errno on error. 99 * Return 0 on success and -errno on error.
100 * 100 *
101 * NOTE: This function runs with the inode_lock spin lock held so it is not 101 * NOTE: This function runs with the inode->i_lock spin lock held so it is not
102 * allowed to sleep. (Hence the GFP_ATOMIC allocation.) 102 * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
103 */ 103 */
104static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) 104static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
@@ -622,7 +622,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
622 */ 622 */
623 /* Everyone gets all permissions. */ 623 /* Everyone gets all permissions. */
624 vi->i_mode |= S_IRWXUGO; 624 vi->i_mode |= S_IRWXUGO;
625 /* If read-only, noone gets write permissions. */ 625 /* If read-only, no one gets write permissions. */
626 if (IS_RDONLY(vi)) 626 if (IS_RDONLY(vi))
627 vi->i_mode &= ~S_IWUGO; 627 vi->i_mode &= ~S_IWUGO;
628 if (m->flags & MFT_RECORD_IS_DIRECTORY) { 628 if (m->flags & MFT_RECORD_IS_DIRECTORY) {
@@ -2529,7 +2529,7 @@ retry_truncate:
2529 * specifies that the behaviour is unspecified thus we do not 2529 * specifies that the behaviour is unspecified thus we do not
2530 * have to do anything. This means that in our implementation 2530 * have to do anything. This means that in our implementation
2531 * in the rare case that the file is mmap()ped and a write 2531 * in the rare case that the file is mmap()ped and a write
2532 * occured into the mmap()ped region just beyond the file size 2532 * occurred into the mmap()ped region just beyond the file size
2533 * and writepage has not yet been called to write out the page 2533 * and writepage has not yet been called to write out the page
2534 * (which would clear the area beyond the file size) and we now 2534 * (which would clear the area beyond the file size) and we now
2535 * extend the file size to incorporate this dirty region 2535 * extend the file size to incorporate this dirty region
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 8b2549f672bf..faece7190866 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -286,7 +286,7 @@ typedef le16 MFT_RECORD_FLAGS;
286 * fragmented. Volume free space includes the empty part of the mft zone and 286 * fragmented. Volume free space includes the empty part of the mft zone and
287 * when the volume's free 88% are used up, the mft zone is shrunk by a factor 287 * when the volume's free 88% are used up, the mft zone is shrunk by a factor
288 * of 2, thus making more space available for more files/data. This process is 288 * of 2, thus making more space available for more files/data. This process is
289 * repeated everytime there is no more free space except for the mft zone until 289 * repeated every time there is no more free space except for the mft zone until
290 * there really is no more free space. 290 * there really is no more free space.
291 */ 291 */
292 292
@@ -1657,13 +1657,13 @@ typedef enum {
1657 * pointed to by the Owner field was provided by a defaulting mechanism 1657 * pointed to by the Owner field was provided by a defaulting mechanism
1658 * rather than explicitly provided by the original provider of the 1658 * rather than explicitly provided by the original provider of the
1659 * security descriptor. This may affect the treatment of the SID with 1659 * security descriptor. This may affect the treatment of the SID with
1660 * respect to inheritence of an owner. 1660 * respect to inheritance of an owner.
1661 * 1661 *
1662 * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in 1662 * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in
1663 * the Group field was provided by a defaulting mechanism rather than 1663 * the Group field was provided by a defaulting mechanism rather than
1664 * explicitly provided by the original provider of the security 1664 * explicitly provided by the original provider of the security
1665 * descriptor. This may affect the treatment of the SID with respect to 1665 * descriptor. This may affect the treatment of the SID with respect to
1666 * inheritence of a primary group. 1666 * inheritance of a primary group.
1667 * 1667 *
1668 * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security 1668 * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security
1669 * descriptor contains a discretionary ACL. If this flag is set and the 1669 * descriptor contains a discretionary ACL. If this flag is set and the
@@ -1674,7 +1674,7 @@ typedef enum {
1674 * pointed to by the Dacl field was provided by a defaulting mechanism 1674 * pointed to by the Dacl field was provided by a defaulting mechanism
1675 * rather than explicitly provided by the original provider of the 1675 * rather than explicitly provided by the original provider of the
1676 * security descriptor. This may affect the treatment of the ACL with 1676 * security descriptor. This may affect the treatment of the ACL with
1677 * respect to inheritence of an ACL. This flag is ignored if the 1677 * respect to inheritance of an ACL. This flag is ignored if the
1678 * DaclPresent flag is not set. 1678 * DaclPresent flag is not set.
1679 * 1679 *
1680 * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security 1680 * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security
@@ -1686,7 +1686,7 @@ typedef enum {
1686 * pointed to by the Sacl field was provided by a defaulting mechanism 1686 * pointed to by the Sacl field was provided by a defaulting mechanism
1687 * rather than explicitly provided by the original provider of the 1687 * rather than explicitly provided by the original provider of the
1688 * security descriptor. This may affect the treatment of the ACL with 1688 * security descriptor. This may affect the treatment of the ACL with
1689 * respect to inheritence of an ACL. This flag is ignored if the 1689 * respect to inheritance of an ACL. This flag is ignored if the
1690 * SaclPresent flag is not set. 1690 * SaclPresent flag is not set.
1691 * 1691 *
1692 * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security 1692 * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security
@@ -2283,7 +2283,7 @@ typedef struct {
2283 // the key_length is zero, then the vcn immediately 2283 // the key_length is zero, then the vcn immediately
2284 // follows the INDEX_ENTRY_HEADER. Regardless of 2284 // follows the INDEX_ENTRY_HEADER. Regardless of
2285 // key_length, the address of the 8-byte boundary 2285 // key_length, the address of the 8-byte boundary
2286 // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by 2286 // aligned vcn of INDEX_ENTRY{_HEADER} *ie is given by
2287 // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), 2287 // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN),
2288 // where sizeof(VCN) can be hardcoded as 8 if wanted. */ 2288 // where sizeof(VCN) can be hardcoded as 8 if wanted. */
2289} __attribute__ ((__packed__)) INDEX_ENTRY; 2289} __attribute__ ((__packed__)) INDEX_ENTRY;
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 4dadcdf3d451..c71de292c5ad 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -669,7 +669,7 @@ err_out:
669 * of cases where we think that a volume is dirty when in fact it is clean. 669 * of cases where we think that a volume is dirty when in fact it is clean.
670 * This should only affect volumes that have not been shutdown cleanly but did 670 * This should only affect volumes that have not been shutdown cleanly but did
671 * not have any pending, non-check-pointed i/o, i.e. they were completely idle 671 * not have any pending, non-check-pointed i/o, i.e. they were completely idle
672 * at least for the five seconds preceeding the unclean shutdown. 672 * at least for the five seconds preceding the unclean shutdown.
673 * 673 *
674 * This function assumes that the $LogFile journal has already been consistency 674 * This function assumes that the $LogFile journal has already been consistency
675 * checked by a call to ntfs_check_logfile() and in particular if the $LogFile 675 * checked by a call to ntfs_check_logfile() and in particular if the $LogFile
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index b5a6f08bd35c..aa2b6ac3f0a4 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -222,7 +222,7 @@ typedef struct {
222/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the 222/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the
223 restart_area_offset + the offset of the 223 restart_area_offset + the offset of the
224 file_size are > 510 then corruption has 224 file_size are > 510 then corruption has
225 occured. This is the very first check when 225 occurred. This is the very first check when
226 starting with the restart_area as if it 226 starting with the restart_area as if it
227 fails it means that some of the above values 227 fails it means that some of the above values
228 will be corrupted by the multi sector 228 will be corrupted by the multi sector
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 326e7475a22a..382857f9c7db 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -73,7 +73,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
73 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs + 73 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
74 vol->mft_record_size) { 74 vol->mft_record_size) {
75 page = ERR_PTR(-ENOENT); 75 page = ERR_PTR(-ENOENT);
76 ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " 76 ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
77 "which is beyond the end of the mft. " 77 "which is beyond the end of the mft. "
78 "This is probably a bug in the ntfs " 78 "This is probably a bug in the ntfs "
79 "driver.", ni->mft_no); 79 "driver.", ni->mft_no);
@@ -1442,7 +1442,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1442 // Note: It will need to be a special mft record and if none of 1442 // Note: It will need to be a special mft record and if none of
1443 // those are available it gets rather complicated... 1443 // those are available it gets rather complicated...
1444 ntfs_error(vol->sb, "Not enough space in this mft record to " 1444 ntfs_error(vol->sb, "Not enough space in this mft record to "
1445 "accomodate extended mft bitmap attribute " 1445 "accommodate extended mft bitmap attribute "
1446 "extent. Cannot handle this yet."); 1446 "extent. Cannot handle this yet.");
1447 ret = -EOPNOTSUPP; 1447 ret = -EOPNOTSUPP;
1448 goto undo_alloc; 1448 goto undo_alloc;
@@ -1879,7 +1879,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1879 // and we would then need to update all references to this mft 1879 // and we would then need to update all references to this mft
1880 // record appropriately. This is rather complicated... 1880 // record appropriately. This is rather complicated...
1881 ntfs_error(vol->sb, "Not enough space in this mft record to " 1881 ntfs_error(vol->sb, "Not enough space in this mft record to "
1882 "accomodate extended mft data attribute " 1882 "accommodate extended mft data attribute "
1883 "extent. Cannot handle this yet."); 1883 "extent. Cannot handle this yet.");
1884 ret = -EOPNOTSUPP; 1884 ret = -EOPNOTSUPP;
1885 goto undo_alloc; 1885 goto undo_alloc;
@@ -2357,7 +2357,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2357 } 2357 }
2358#ifdef DEBUG 2358#ifdef DEBUG
2359 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2359 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2360 ntfs_debug("Status of mftbmp after initialized extention: " 2360 ntfs_debug("Status of mftbmp after initialized extension: "
2361 "allocated_size 0x%llx, data_size 0x%llx, " 2361 "allocated_size 0x%llx, data_size 0x%llx, "
2362 "initialized_size 0x%llx.", 2362 "initialized_size 0x%llx.",
2363 (long long)mftbmp_ni->allocated_size, 2363 (long long)mftbmp_ni->allocated_size,
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 56a9a6d25a2a..eac7d6788a10 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1243,7 +1243,7 @@ err_out:
1243 * write. 1243 * write.
1244 * 1244 *
1245 * This is used when building the mapping pairs array of a runlist to compress 1245 * This is used when building the mapping pairs array of a runlist to compress
1246 * a given logical cluster number (lcn) or a specific run length to the minumum 1246 * a given logical cluster number (lcn) or a specific run length to the minimum
1247 * size possible. 1247 * size possible.
1248 * 1248 *
1249 * Return the number of bytes written on success. On error, i.e. the 1249 * Return the number of bytes written on success. On error, i.e. the
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 29099a07b9fe..b52706da4645 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -458,7 +458,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
458 * the volume on boot and updates them. 458 * the volume on boot and updates them.
459 * 459 *
460 * When remounting read-only, mark the volume clean if no volume errors 460 * When remounting read-only, mark the volume clean if no volume errors
461 * have occured. 461 * have occurred.
462 */ 462 */
463 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 463 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
464 static const char *es = ". Cannot remount read-write."; 464 static const char *es = ". Cannot remount read-write.";
@@ -1269,7 +1269,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1269 "hibernated on the volume."); 1269 "hibernated on the volume.");
1270 return 0; 1270 return 0;
1271 } 1271 }
1272 /* A real error occured. */ 1272 /* A real error occurred. */
1273 ntfs_error(vol->sb, "Failed to find inode number for " 1273 ntfs_error(vol->sb, "Failed to find inode number for "
1274 "hiberfil.sys."); 1274 "hiberfil.sys.");
1275 return ret; 1275 return ret;
@@ -1370,7 +1370,7 @@ static bool load_and_init_quota(ntfs_volume *vol)
1370 NVolSetQuotaOutOfDate(vol); 1370 NVolSetQuotaOutOfDate(vol);
1371 return true; 1371 return true;
1372 } 1372 }
1373 /* A real error occured. */ 1373 /* A real error occurred. */
1374 ntfs_error(vol->sb, "Failed to find inode number for $Quota."); 1374 ntfs_error(vol->sb, "Failed to find inode number for $Quota.");
1375 return false; 1375 return false;
1376 } 1376 }
@@ -1454,7 +1454,7 @@ not_enabled:
1454 NVolSetUsnJrnlStamped(vol); 1454 NVolSetUsnJrnlStamped(vol);
1455 return true; 1455 return true;
1456 } 1456 }
1457 /* A real error occured. */ 1457 /* A real error occurred. */
1458 ntfs_error(vol->sb, "Failed to find inode number for " 1458 ntfs_error(vol->sb, "Failed to find inode number for "
1459 "$UsnJrnl."); 1459 "$UsnJrnl.");
1460 return false; 1460 return false;
@@ -2292,7 +2292,7 @@ static void ntfs_put_super(struct super_block *sb)
2292 ntfs_commit_inode(vol->mft_ino); 2292 ntfs_commit_inode(vol->mft_ino);
2293 2293
2294 /* 2294 /*
2295 * If a read-write mount and no volume errors have occured, mark the 2295 * If a read-write mount and no volume errors have occurred, mark the
2296 * volume clean. Also, re-commit all affected inodes. 2296 * volume clean. Also, re-commit all affected inodes.
2297 */ 2297 */
2298 if (!(sb->s_flags & MS_RDONLY)) { 2298 if (!(sb->s_flags & MS_RDONLY)) {
@@ -2496,7 +2496,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2496 if (vol->nr_clusters & 63) 2496 if (vol->nr_clusters & 63)
2497 nr_free += 64 - (vol->nr_clusters & 63); 2497 nr_free += 64 - (vol->nr_clusters & 63);
2498 up_read(&vol->lcnbmp_lock); 2498 up_read(&vol->lcnbmp_lock);
2499 /* If errors occured we may well have gone below zero, fix this. */ 2499 /* If errors occurred we may well have gone below zero, fix this. */
2500 if (nr_free < 0) 2500 if (nr_free < 0)
2501 nr_free = 0; 2501 nr_free = 0;
2502 ntfs_debug("Exiting."); 2502 ntfs_debug("Exiting.");
@@ -2561,7 +2561,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2561 } 2561 }
2562 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", 2562 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
2563 index - 1); 2563 index - 1);
2564 /* If errors occured we may well have gone below zero, fix this. */ 2564 /* If errors occurred we may well have gone below zero, fix this. */
2565 if (nr_free < 0) 2565 if (nr_free < 0)
2566 nr_free = 0; 2566 nr_free = 0;
2567 ntfs_debug("Exiting."); 2567 ntfs_debug("Exiting.");
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 07d9fd854350..d8a0313e99e6 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,6 +1,6 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES 3ccflags-y += -DCATCH_BH_JBD_RACES
4 4
5obj-$(CONFIG_OCFS2_FS) += \ 5obj-$(CONFIG_OCFS2_FS) += \
6 ocfs2.o \ 6 ocfs2.o \
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 704f6b1742f3..e913ad130fdd 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -24,7 +24,6 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/string.h> 25#include <linux/string.h>
26 26
27#define MLOG_MASK_PREFIX ML_INODE
28#include <cluster/masklog.h> 27#include <cluster/masklog.h>
29 28
30#include "ocfs2.h" 29#include "ocfs2.h"
@@ -497,7 +496,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
497 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 496 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
498 return -EOPNOTSUPP; 497 return -EOPNOTSUPP;
499 498
500 if (!is_owner_or_cap(inode)) 499 if (!inode_owner_or_capable(inode))
501 return -EPERM; 500 return -EPERM;
502 501
503 if (value) { 502 if (value) {
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e4984e259cb6..48aa9c7401c7 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -30,7 +30,6 @@
30#include <linux/swap.h> 30#include <linux/swap.h>
31#include <linux/quotaops.h> 31#include <linux/quotaops.h>
32 32
33#define MLOG_MASK_PREFIX ML_DISK_ALLOC
34#include <cluster/masklog.h> 33#include <cluster/masklog.h>
35 34
36#include "ocfs2.h" 35#include "ocfs2.h"
@@ -50,6 +49,7 @@
50#include "uptodate.h" 49#include "uptodate.h"
51#include "xattr.h" 50#include "xattr.h"
52#include "refcounttree.h" 51#include "refcounttree.h"
52#include "ocfs2_trace.h"
53 53
54#include "buffer_head_io.h" 54#include "buffer_head_io.h"
55 55
@@ -886,8 +886,7 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
886 struct ocfs2_extent_block *eb = 886 struct ocfs2_extent_block *eb =
887 (struct ocfs2_extent_block *)bh->b_data; 887 (struct ocfs2_extent_block *)bh->b_data;
888 888
889 mlog(0, "Validating extent block %llu\n", 889 trace_ocfs2_validate_extent_block((unsigned long long)bh->b_blocknr);
890 (unsigned long long)bh->b_blocknr);
891 890
892 BUG_ON(!buffer_uptodate(bh)); 891 BUG_ON(!buffer_uptodate(bh));
893 892
@@ -965,8 +964,6 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
965 struct buffer_head *eb_bh = NULL; 964 struct buffer_head *eb_bh = NULL;
966 u64 last_eb_blk = 0; 965 u64 last_eb_blk = 0;
967 966
968 mlog_entry_void();
969
970 el = et->et_root_el; 967 el = et->et_root_el;
971 last_eb_blk = ocfs2_et_get_last_eb_blk(et); 968 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
972 969
@@ -987,7 +984,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
987bail: 984bail:
988 brelse(eb_bh); 985 brelse(eb_bh);
989 986
990 mlog_exit(retval); 987 trace_ocfs2_num_free_extents(retval);
991 return retval; 988 return retval;
992} 989}
993 990
@@ -1010,8 +1007,6 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1010 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); 1007 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
1011 struct ocfs2_extent_block *eb; 1008 struct ocfs2_extent_block *eb;
1012 1009
1013 mlog_entry_void();
1014
1015 count = 0; 1010 count = 0;
1016 while (count < wanted) { 1011 while (count < wanted) {
1017 status = ocfs2_claim_metadata(handle, 1012 status = ocfs2_claim_metadata(handle,
@@ -1074,8 +1069,8 @@ bail:
1074 brelse(bhs[i]); 1069 brelse(bhs[i]);
1075 bhs[i] = NULL; 1070 bhs[i] = NULL;
1076 } 1071 }
1072 mlog_errno(status);
1077 } 1073 }
1078 mlog_exit(status);
1079 return status; 1074 return status;
1080} 1075}
1081 1076
@@ -1173,8 +1168,6 @@ static int ocfs2_add_branch(handle_t *handle,
1173 struct ocfs2_extent_list *el; 1168 struct ocfs2_extent_list *el;
1174 u32 new_cpos, root_end; 1169 u32 new_cpos, root_end;
1175 1170
1176 mlog_entry_void();
1177
1178 BUG_ON(!last_eb_bh || !*last_eb_bh); 1171 BUG_ON(!last_eb_bh || !*last_eb_bh);
1179 1172
1180 if (eb_bh) { 1173 if (eb_bh) {
@@ -1200,8 +1193,11 @@ static int ocfs2_add_branch(handle_t *handle,
1200 * from new_cpos). 1193 * from new_cpos).
1201 */ 1194 */
1202 if (root_end > new_cpos) { 1195 if (root_end > new_cpos) {
1203 mlog(0, "adjust the cluster end from %u to %u\n", 1196 trace_ocfs2_adjust_rightmost_branch(
1204 root_end, new_cpos); 1197 (unsigned long long)
1198 ocfs2_metadata_cache_owner(et->et_ci),
1199 root_end, new_cpos);
1200
1205 status = ocfs2_adjust_rightmost_branch(handle, et); 1201 status = ocfs2_adjust_rightmost_branch(handle, et);
1206 if (status) { 1202 if (status) {
1207 mlog_errno(status); 1203 mlog_errno(status);
@@ -1332,7 +1328,6 @@ bail:
1332 kfree(new_eb_bhs); 1328 kfree(new_eb_bhs);
1333 } 1329 }
1334 1330
1335 mlog_exit(status);
1336 return status; 1331 return status;
1337} 1332}
1338 1333
@@ -1353,8 +1348,6 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1353 struct ocfs2_extent_list *root_el; 1348 struct ocfs2_extent_list *root_el;
1354 struct ocfs2_extent_list *eb_el; 1349 struct ocfs2_extent_list *eb_el;
1355 1350
1356 mlog_entry_void();
1357
1358 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, 1351 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
1359 &new_eb_bh); 1352 &new_eb_bh);
1360 if (status < 0) { 1353 if (status < 0) {
@@ -1415,7 +1408,6 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1415bail: 1408bail:
1416 brelse(new_eb_bh); 1409 brelse(new_eb_bh);
1417 1410
1418 mlog_exit(status);
1419 return status; 1411 return status;
1420} 1412}
1421 1413
@@ -1446,8 +1438,6 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
1446 struct buffer_head *bh = NULL; 1438 struct buffer_head *bh = NULL;
1447 struct buffer_head *lowest_bh = NULL; 1439 struct buffer_head *lowest_bh = NULL;
1448 1440
1449 mlog_entry_void();
1450
1451 *target_bh = NULL; 1441 *target_bh = NULL;
1452 1442
1453 el = et->et_root_el; 1443 el = et->et_root_el;
@@ -1503,7 +1493,6 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
1503bail: 1493bail:
1504 brelse(bh); 1494 brelse(bh);
1505 1495
1506 mlog_exit(status);
1507 return status; 1496 return status;
1508} 1497}
1509 1498
@@ -1540,7 +1529,10 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
1540 * another tree level */ 1529 * another tree level */
1541 if (shift) { 1530 if (shift) {
1542 BUG_ON(bh); 1531 BUG_ON(bh);
1543 mlog(0, "need to shift tree depth (current = %d)\n", depth); 1532 trace_ocfs2_grow_tree(
1533 (unsigned long long)
1534 ocfs2_metadata_cache_owner(et->et_ci),
1535 depth);
1544 1536
1545 /* ocfs2_shift_tree_depth will return us a buffer with 1537 /* ocfs2_shift_tree_depth will return us a buffer with
1546 * the new extent block (so we can pass that to 1538 * the new extent block (so we can pass that to
@@ -1570,7 +1562,6 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
1570 1562
1571 /* call ocfs2_add_branch to add the final part of the tree with 1563 /* call ocfs2_add_branch to add the final part of the tree with
1572 * the new data. */ 1564 * the new data. */
1573 mlog(0, "add branch. bh = %p\n", bh);
1574 ret = ocfs2_add_branch(handle, et, bh, last_eb_bh, 1565 ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
1575 meta_ac); 1566 meta_ac);
1576 if (ret < 0) { 1567 if (ret < 0) {
@@ -1645,8 +1636,9 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
1645 } 1636 }
1646 insert_index = i; 1637 insert_index = i;
1647 1638
1648 mlog(0, "ins %u: index %d, has_empty %d, next_free %d, count %d\n", 1639 trace_ocfs2_rotate_leaf(insert_cpos, insert_index,
1649 insert_cpos, insert_index, has_empty, next_free, le16_to_cpu(el->l_count)); 1640 has_empty, next_free,
1641 le16_to_cpu(el->l_count));
1650 1642
1651 BUG_ON(insert_index < 0); 1643 BUG_ON(insert_index < 0);
1652 BUG_ON(insert_index >= le16_to_cpu(el->l_count)); 1644 BUG_ON(insert_index >= le16_to_cpu(el->l_count));
@@ -2059,7 +2051,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2059 left_el = path_leaf_el(left_path); 2051 left_el = path_leaf_el(left_path);
2060 right_el = path_leaf_el(right_path); 2052 right_el = path_leaf_el(right_path);
2061 for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) { 2053 for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
2062 mlog(0, "Adjust records at index %u\n", i); 2054 trace_ocfs2_complete_edge_insert(i);
2063 2055
2064 /* 2056 /*
2065 * One nice property of knowing that all of these 2057 * One nice property of knowing that all of these
@@ -2389,7 +2381,9 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
2389 goto out; 2381 goto out;
2390 } 2382 }
2391 2383
2392 mlog(0, "Insert: %u, first left path cpos: %u\n", insert_cpos, cpos); 2384 trace_ocfs2_rotate_tree_right(
2385 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2386 insert_cpos, cpos);
2393 2387
2394 /* 2388 /*
2395 * What we want to do here is: 2389 * What we want to do here is:
@@ -2418,8 +2412,10 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
2418 * rotating subtrees. 2412 * rotating subtrees.
2419 */ 2413 */
2420 while (cpos && insert_cpos <= cpos) { 2414 while (cpos && insert_cpos <= cpos) {
2421 mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", 2415 trace_ocfs2_rotate_tree_right(
2422 insert_cpos, cpos); 2416 (unsigned long long)
2417 ocfs2_metadata_cache_owner(et->et_ci),
2418 insert_cpos, cpos);
2423 2419
2424 ret = ocfs2_find_path(et->et_ci, left_path, cpos); 2420 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
2425 if (ret) { 2421 if (ret) {
@@ -2461,10 +2457,10 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
2461 2457
2462 start = ocfs2_find_subtree_root(et, left_path, right_path); 2458 start = ocfs2_find_subtree_root(et, left_path, right_path);
2463 2459
2464 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2460 trace_ocfs2_rotate_subtree(start,
2465 start, 2461 (unsigned long long)
2466 (unsigned long long) right_path->p_node[start].bh->b_blocknr, 2462 right_path->p_node[start].bh->b_blocknr,
2467 right_path->p_tree_depth); 2463 right_path->p_tree_depth);
2468 2464
2469 ret = ocfs2_extend_rotate_transaction(handle, start, 2465 ret = ocfs2_extend_rotate_transaction(handle, start,
2470 orig_credits, right_path); 2466 orig_credits, right_path);
@@ -2964,8 +2960,7 @@ static int __ocfs2_rotate_tree_left(handle_t *handle,
2964 subtree_root = ocfs2_find_subtree_root(et, left_path, 2960 subtree_root = ocfs2_find_subtree_root(et, left_path,
2965 right_path); 2961 right_path);
2966 2962
2967 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2963 trace_ocfs2_rotate_subtree(subtree_root,
2968 subtree_root,
2969 (unsigned long long) 2964 (unsigned long long)
2970 right_path->p_node[subtree_root].bh->b_blocknr, 2965 right_path->p_node[subtree_root].bh->b_blocknr,
2971 right_path->p_tree_depth); 2966 right_path->p_tree_depth);
@@ -3989,9 +3984,11 @@ static int ocfs2_append_rec_to_path(handle_t *handle,
3989 goto out; 3984 goto out;
3990 } 3985 }
3991 3986
3992 mlog(0, "Append may need a left path update. cpos: %u, " 3987 trace_ocfs2_append_rec_to_path(
3993 "left_cpos: %u\n", le32_to_cpu(insert_rec->e_cpos), 3988 (unsigned long long)
3994 left_cpos); 3989 ocfs2_metadata_cache_owner(et->et_ci),
3990 le32_to_cpu(insert_rec->e_cpos),
3991 left_cpos);
3995 3992
3996 /* 3993 /*
3997 * No need to worry if the append is already in the 3994 * No need to worry if the append is already in the
@@ -4522,7 +4519,7 @@ set_tail_append:
4522} 4519}
4523 4520
4524/* 4521/*
4525 * Helper function called at the begining of an insert. 4522 * Helper function called at the beginning of an insert.
4526 * 4523 *
4527 * This computes a few things that are commonly used in the process of 4524 * This computes a few things that are commonly used in the process of
4528 * inserting into the btree: 4525 * inserting into the btree:
@@ -4562,7 +4559,7 @@ static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
4562 ocfs2_et_get_last_eb_blk(et), 4559 ocfs2_et_get_last_eb_blk(et),
4563 &bh); 4560 &bh);
4564 if (ret) { 4561 if (ret) {
4565 mlog_exit(ret); 4562 mlog_errno(ret);
4566 goto out; 4563 goto out;
4567 } 4564 }
4568 eb = (struct ocfs2_extent_block *) bh->b_data; 4565 eb = (struct ocfs2_extent_block *) bh->b_data;
@@ -4678,9 +4675,9 @@ int ocfs2_insert_extent(handle_t *handle,
4678 struct ocfs2_insert_type insert = {0, }; 4675 struct ocfs2_insert_type insert = {0, };
4679 struct ocfs2_extent_rec rec; 4676 struct ocfs2_extent_rec rec;
4680 4677
4681 mlog(0, "add %u clusters at position %u to owner %llu\n", 4678 trace_ocfs2_insert_extent_start(
4682 new_clusters, cpos, 4679 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
4683 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); 4680 cpos, new_clusters);
4684 4681
4685 memset(&rec, 0, sizeof(rec)); 4682 memset(&rec, 0, sizeof(rec));
4686 rec.e_cpos = cpu_to_le32(cpos); 4683 rec.e_cpos = cpu_to_le32(cpos);
@@ -4700,11 +4697,9 @@ int ocfs2_insert_extent(handle_t *handle,
4700 goto bail; 4697 goto bail;
4701 } 4698 }
4702 4699
4703 mlog(0, "Insert.appending: %u, Insert.Contig: %u, " 4700 trace_ocfs2_insert_extent(insert.ins_appending, insert.ins_contig,
4704 "Insert.contig_index: %d, Insert.free_records: %d, " 4701 insert.ins_contig_index, free_records,
4705 "Insert.tree_depth: %d\n", 4702 insert.ins_tree_depth);
4706 insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
4707 free_records, insert.ins_tree_depth);
4708 4703
4709 if (insert.ins_contig == CONTIG_NONE && free_records == 0) { 4704 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
4710 status = ocfs2_grow_tree(handle, et, 4705 status = ocfs2_grow_tree(handle, et,
@@ -4726,7 +4721,6 @@ int ocfs2_insert_extent(handle_t *handle,
4726bail: 4721bail:
4727 brelse(last_eb_bh); 4722 brelse(last_eb_bh);
4728 4723
4729 mlog_exit(status);
4730 return status; 4724 return status;
4731} 4725}
4732 4726
@@ -4746,7 +4740,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4746 struct ocfs2_alloc_context *meta_ac, 4740 struct ocfs2_alloc_context *meta_ac,
4747 enum ocfs2_alloc_restarted *reason_ret) 4741 enum ocfs2_alloc_restarted *reason_ret)
4748{ 4742{
4749 int status = 0; 4743 int status = 0, err = 0;
4750 int free_extents; 4744 int free_extents;
4751 enum ocfs2_alloc_restarted reason = RESTART_NONE; 4745 enum ocfs2_alloc_restarted reason = RESTART_NONE;
4752 u32 bit_off, num_bits; 4746 u32 bit_off, num_bits;
@@ -4773,14 +4767,14 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4773 * 2) we are so fragmented, we've needed to add metadata too 4767 * 2) we are so fragmented, we've needed to add metadata too
4774 * many times. */ 4768 * many times. */
4775 if (!free_extents && !meta_ac) { 4769 if (!free_extents && !meta_ac) {
4776 mlog(0, "we haven't reserved any metadata!\n"); 4770 err = -1;
4777 status = -EAGAIN; 4771 status = -EAGAIN;
4778 reason = RESTART_META; 4772 reason = RESTART_META;
4779 goto leave; 4773 goto leave;
4780 } else if ((!free_extents) 4774 } else if ((!free_extents)
4781 && (ocfs2_alloc_context_bits_left(meta_ac) 4775 && (ocfs2_alloc_context_bits_left(meta_ac)
4782 < ocfs2_extend_meta_needed(et->et_root_el))) { 4776 < ocfs2_extend_meta_needed(et->et_root_el))) {
4783 mlog(0, "filesystem is really fragmented...\n"); 4777 err = -2;
4784 status = -EAGAIN; 4778 status = -EAGAIN;
4785 reason = RESTART_META; 4779 reason = RESTART_META;
4786 goto leave; 4780 goto leave;
@@ -4805,9 +4799,9 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4805 } 4799 }
4806 4800
4807 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 4801 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4808 mlog(0, "Allocating %u clusters at block %u for owner %llu\n", 4802 trace_ocfs2_add_clusters_in_btree(
4809 num_bits, bit_off, 4803 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
4810 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); 4804 bit_off, num_bits);
4811 status = ocfs2_insert_extent(handle, et, *logical_offset, block, 4805 status = ocfs2_insert_extent(handle, et, *logical_offset, block,
4812 num_bits, flags, meta_ac); 4806 num_bits, flags, meta_ac);
4813 if (status < 0) { 4807 if (status < 0) {
@@ -4821,16 +4815,15 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4821 *logical_offset += num_bits; 4815 *logical_offset += num_bits;
4822 4816
4823 if (clusters_to_add) { 4817 if (clusters_to_add) {
4824 mlog(0, "need to alloc once more, wanted = %u\n", 4818 err = clusters_to_add;
4825 clusters_to_add);
4826 status = -EAGAIN; 4819 status = -EAGAIN;
4827 reason = RESTART_TRANS; 4820 reason = RESTART_TRANS;
4828 } 4821 }
4829 4822
4830leave: 4823leave:
4831 mlog_exit(status);
4832 if (reason_ret) 4824 if (reason_ret)
4833 *reason_ret = reason; 4825 *reason_ret = reason;
4826 trace_ocfs2_add_clusters_in_btree_ret(status, reason, err);
4834 return status; 4827 return status;
4835} 4828}
4836 4829
@@ -5039,7 +5032,7 @@ int ocfs2_split_extent(handle_t *handle,
5039 ocfs2_et_get_last_eb_blk(et), 5032 ocfs2_et_get_last_eb_blk(et),
5040 &last_eb_bh); 5033 &last_eb_bh);
5041 if (ret) { 5034 if (ret) {
5042 mlog_exit(ret); 5035 mlog_errno(ret);
5043 goto out; 5036 goto out;
5044 } 5037 }
5045 5038
@@ -5056,9 +5049,9 @@ int ocfs2_split_extent(handle_t *handle,
5056 5049
5057 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); 5050 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
5058 5051
5059 mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n", 5052 trace_ocfs2_split_extent(split_index, ctxt.c_contig_type,
5060 split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent, 5053 ctxt.c_has_empty_extent,
5061 ctxt.c_split_covers_rec); 5054 ctxt.c_split_covers_rec);
5062 5055
5063 if (ctxt.c_contig_type == CONTIG_NONE) { 5056 if (ctxt.c_contig_type == CONTIG_NONE) {
5064 if (ctxt.c_split_covers_rec) 5057 if (ctxt.c_split_covers_rec)
@@ -5192,8 +5185,9 @@ int ocfs2_mark_extent_written(struct inode *inode,
5192{ 5185{
5193 int ret; 5186 int ret;
5194 5187
5195 mlog(0, "Inode %lu cpos %u, len %u, phys clusters %u\n", 5188 trace_ocfs2_mark_extent_written(
5196 inode->i_ino, cpos, len, phys); 5189 (unsigned long long)OCFS2_I(inode)->ip_blkno,
5190 cpos, len, phys);
5197 5191
5198 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { 5192 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
5199 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " 5193 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
@@ -5512,11 +5506,10 @@ int ocfs2_remove_extent(handle_t *handle,
5512 5506
5513 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); 5507 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
5514 5508
5515 mlog(0, "Owner %llu, remove (cpos %u, len %u). Existing index %d " 5509 trace_ocfs2_remove_extent(
5516 "(cpos %u, len %u)\n", 5510 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5517 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), 5511 cpos, len, index, le32_to_cpu(rec->e_cpos),
5518 cpos, len, index, 5512 ocfs2_rec_clusters(el, rec));
5519 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
5520 5513
5521 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { 5514 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
5522 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc, 5515 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
@@ -5795,9 +5788,6 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5795 struct ocfs2_dinode *di; 5788 struct ocfs2_dinode *di;
5796 struct ocfs2_truncate_log *tl; 5789 struct ocfs2_truncate_log *tl;
5797 5790
5798 mlog_entry("start_blk = %llu, num_clusters = %u\n",
5799 (unsigned long long)start_blk, num_clusters);
5800
5801 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5791 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
5802 5792
5803 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 5793 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
@@ -5834,10 +5824,9 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5834 goto bail; 5824 goto bail;
5835 } 5825 }
5836 5826
5837 mlog(0, "Log truncate of %u clusters starting at cluster %u to " 5827 trace_ocfs2_truncate_log_append(
5838 "%llu (index = %d)\n", num_clusters, start_cluster, 5828 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index,
5839 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index); 5829 start_cluster, num_clusters);
5840
5841 if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 5830 if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
5842 /* 5831 /*
5843 * Move index back to the record we are coalescing with. 5832 * Move index back to the record we are coalescing with.
@@ -5846,9 +5835,10 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5846 index--; 5835 index--;
5847 5836
5848 num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 5837 num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
5849 mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 5838 trace_ocfs2_truncate_log_append(
5850 index, le32_to_cpu(tl->tl_recs[index].t_start), 5839 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
5851 num_clusters); 5840 index, le32_to_cpu(tl->tl_recs[index].t_start),
5841 num_clusters);
5852 } else { 5842 } else {
5853 tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 5843 tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
5854 tl->tl_used = cpu_to_le16(index + 1); 5844 tl->tl_used = cpu_to_le16(index + 1);
@@ -5859,7 +5849,6 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5859 5849
5860 osb->truncated_clusters += num_clusters; 5850 osb->truncated_clusters += num_clusters;
5861bail: 5851bail:
5862 mlog_exit(status);
5863 return status; 5852 return status;
5864} 5853}
5865 5854
@@ -5878,8 +5867,6 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5878 struct inode *tl_inode = osb->osb_tl_inode; 5867 struct inode *tl_inode = osb->osb_tl_inode;
5879 struct buffer_head *tl_bh = osb->osb_tl_bh; 5868 struct buffer_head *tl_bh = osb->osb_tl_bh;
5880 5869
5881 mlog_entry_void();
5882
5883 di = (struct ocfs2_dinode *) tl_bh->b_data; 5870 di = (struct ocfs2_dinode *) tl_bh->b_data;
5884 tl = &di->id2.i_dealloc; 5871 tl = &di->id2.i_dealloc;
5885 i = le16_to_cpu(tl->tl_used) - 1; 5872 i = le16_to_cpu(tl->tl_used) - 1;
@@ -5915,8 +5902,9 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5915 /* if start_blk is not set, we ignore the record as 5902 /* if start_blk is not set, we ignore the record as
5916 * invalid. */ 5903 * invalid. */
5917 if (start_blk) { 5904 if (start_blk) {
5918 mlog(0, "free record %d, start = %u, clusters = %u\n", 5905 trace_ocfs2_replay_truncate_records(
5919 i, le32_to_cpu(rec.t_start), num_clusters); 5906 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
5907 i, le32_to_cpu(rec.t_start), num_clusters);
5920 5908
5921 status = ocfs2_free_clusters(handle, data_alloc_inode, 5909 status = ocfs2_free_clusters(handle, data_alloc_inode,
5922 data_alloc_bh, start_blk, 5910 data_alloc_bh, start_blk,
@@ -5932,7 +5920,6 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5932 osb->truncated_clusters = 0; 5920 osb->truncated_clusters = 0;
5933 5921
5934bail: 5922bail:
5935 mlog_exit(status);
5936 return status; 5923 return status;
5937} 5924}
5938 5925
@@ -5949,8 +5936,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
5949 struct ocfs2_dinode *di; 5936 struct ocfs2_dinode *di;
5950 struct ocfs2_truncate_log *tl; 5937 struct ocfs2_truncate_log *tl;
5951 5938
5952 mlog_entry_void();
5953
5954 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5939 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
5955 5940
5956 di = (struct ocfs2_dinode *) tl_bh->b_data; 5941 di = (struct ocfs2_dinode *) tl_bh->b_data;
@@ -5962,8 +5947,9 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
5962 5947
5963 tl = &di->id2.i_dealloc; 5948 tl = &di->id2.i_dealloc;
5964 num_to_flush = le16_to_cpu(tl->tl_used); 5949 num_to_flush = le16_to_cpu(tl->tl_used);
5965 mlog(0, "Flush %u records from truncate log #%llu\n", 5950 trace_ocfs2_flush_truncate_log(
5966 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 5951 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
5952 num_to_flush);
5967 if (!num_to_flush) { 5953 if (!num_to_flush) {
5968 status = 0; 5954 status = 0;
5969 goto out; 5955 goto out;
@@ -6009,7 +5995,6 @@ out_mutex:
6009 iput(data_alloc_inode); 5995 iput(data_alloc_inode);
6010 5996
6011out: 5997out:
6012 mlog_exit(status);
6013 return status; 5998 return status;
6014} 5999}
6015 6000
@@ -6032,15 +6017,11 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
6032 container_of(work, struct ocfs2_super, 6017 container_of(work, struct ocfs2_super,
6033 osb_truncate_log_wq.work); 6018 osb_truncate_log_wq.work);
6034 6019
6035 mlog_entry_void();
6036
6037 status = ocfs2_flush_truncate_log(osb); 6020 status = ocfs2_flush_truncate_log(osb);
6038 if (status < 0) 6021 if (status < 0)
6039 mlog_errno(status); 6022 mlog_errno(status);
6040 else 6023 else
6041 ocfs2_init_steal_slots(osb); 6024 ocfs2_init_steal_slots(osb);
6042
6043 mlog_exit(status);
6044} 6025}
6045 6026
6046#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 6027#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
@@ -6086,7 +6067,6 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
6086 *tl_inode = inode; 6067 *tl_inode = inode;
6087 *tl_bh = bh; 6068 *tl_bh = bh;
6088bail: 6069bail:
6089 mlog_exit(status);
6090 return status; 6070 return status;
6091} 6071}
6092 6072
@@ -6106,7 +6086,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
6106 6086
6107 *tl_copy = NULL; 6087 *tl_copy = NULL;
6108 6088
6109 mlog(0, "recover truncate log from slot %d\n", slot_num); 6089 trace_ocfs2_begin_truncate_log_recovery(slot_num);
6110 6090
6111 status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 6091 status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
6112 if (status < 0) { 6092 if (status < 0) {
@@ -6123,8 +6103,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
6123 6103
6124 tl = &di->id2.i_dealloc; 6104 tl = &di->id2.i_dealloc;
6125 if (le16_to_cpu(tl->tl_used)) { 6105 if (le16_to_cpu(tl->tl_used)) {
6126 mlog(0, "We'll have %u logs to recover\n", 6106 trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
6127 le16_to_cpu(tl->tl_used));
6128 6107
6129 *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 6108 *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
6130 if (!(*tl_copy)) { 6109 if (!(*tl_copy)) {
@@ -6157,9 +6136,9 @@ bail:
6157 if (status < 0 && (*tl_copy)) { 6136 if (status < 0 && (*tl_copy)) {
6158 kfree(*tl_copy); 6137 kfree(*tl_copy);
6159 *tl_copy = NULL; 6138 *tl_copy = NULL;
6139 mlog_errno(status);
6160 } 6140 }
6161 6141
6162 mlog_exit(status);
6163 return status; 6142 return status;
6164} 6143}
6165 6144
@@ -6174,8 +6153,6 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
6174 struct inode *tl_inode = osb->osb_tl_inode; 6153 struct inode *tl_inode = osb->osb_tl_inode;
6175 struct ocfs2_truncate_log *tl; 6154 struct ocfs2_truncate_log *tl;
6176 6155
6177 mlog_entry_void();
6178
6179 if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 6156 if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
6180 mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 6157 mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
6181 return -EINVAL; 6158 return -EINVAL;
@@ -6183,8 +6160,9 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
6183 6160
6184 tl = &tl_copy->id2.i_dealloc; 6161 tl = &tl_copy->id2.i_dealloc;
6185 num_recs = le16_to_cpu(tl->tl_used); 6162 num_recs = le16_to_cpu(tl->tl_used);
6186 mlog(0, "cleanup %u records from %llu\n", num_recs, 6163 trace_ocfs2_complete_truncate_log_recovery(
6187 (unsigned long long)le64_to_cpu(tl_copy->i_blkno)); 6164 (unsigned long long)le64_to_cpu(tl_copy->i_blkno),
6165 num_recs);
6188 6166
6189 mutex_lock(&tl_inode->i_mutex); 6167 mutex_lock(&tl_inode->i_mutex);
6190 for(i = 0; i < num_recs; i++) { 6168 for(i = 0; i < num_recs; i++) {
@@ -6219,7 +6197,6 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
6219bail_up: 6197bail_up:
6220 mutex_unlock(&tl_inode->i_mutex); 6198 mutex_unlock(&tl_inode->i_mutex);
6221 6199
6222 mlog_exit(status);
6223 return status; 6200 return status;
6224} 6201}
6225 6202
@@ -6228,8 +6205,6 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
6228 int status; 6205 int status;
6229 struct inode *tl_inode = osb->osb_tl_inode; 6206 struct inode *tl_inode = osb->osb_tl_inode;
6230 6207
6231 mlog_entry_void();
6232
6233 if (tl_inode) { 6208 if (tl_inode) {
6234 cancel_delayed_work(&osb->osb_truncate_log_wq); 6209 cancel_delayed_work(&osb->osb_truncate_log_wq);
6235 flush_workqueue(ocfs2_wq); 6210 flush_workqueue(ocfs2_wq);
@@ -6241,8 +6216,6 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
6241 brelse(osb->osb_tl_bh); 6216 brelse(osb->osb_tl_bh);
6242 iput(osb->osb_tl_inode); 6217 iput(osb->osb_tl_inode);
6243 } 6218 }
6244
6245 mlog_exit_void();
6246} 6219}
6247 6220
6248int ocfs2_truncate_log_init(struct ocfs2_super *osb) 6221int ocfs2_truncate_log_init(struct ocfs2_super *osb)
@@ -6251,8 +6224,6 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
6251 struct inode *tl_inode = NULL; 6224 struct inode *tl_inode = NULL;
6252 struct buffer_head *tl_bh = NULL; 6225 struct buffer_head *tl_bh = NULL;
6253 6226
6254 mlog_entry_void();
6255
6256 status = ocfs2_get_truncate_log_info(osb, 6227 status = ocfs2_get_truncate_log_info(osb,
6257 osb->slot_num, 6228 osb->slot_num,
6258 &tl_inode, 6229 &tl_inode,
@@ -6268,7 +6239,6 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
6268 osb->osb_tl_bh = tl_bh; 6239 osb->osb_tl_bh = tl_bh;
6269 osb->osb_tl_inode = tl_inode; 6240 osb->osb_tl_inode = tl_inode;
6270 6241
6271 mlog_exit(status);
6272 return status; 6242 return status;
6273} 6243}
6274 6244
@@ -6350,8 +6320,8 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
6350 else 6320 else
6351 bg_blkno = ocfs2_which_suballoc_group(head->free_blk, 6321 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
6352 head->free_bit); 6322 head->free_bit);
6353 mlog(0, "Free bit: (bit %u, blkno %llu)\n", 6323 trace_ocfs2_free_cached_blocks(
6354 head->free_bit, (unsigned long long)head->free_blk); 6324 (unsigned long long)head->free_blk, head->free_bit);
6355 6325
6356 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, 6326 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
6357 head->free_bit, bg_blkno, 1); 6327 head->free_bit, bg_blkno, 1);
@@ -6404,8 +6374,7 @@ int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6404 return ret; 6374 return ret;
6405 } 6375 }
6406 6376
6407 mlog(0, "Insert clusters: (bit %u, blk %llu)\n", 6377 trace_ocfs2_cache_cluster_dealloc((unsigned long long)blkno, bit);
6408 bit, (unsigned long long)blkno);
6409 6378
6410 item->free_blk = blkno; 6379 item->free_blk = blkno;
6411 item->free_bit = bit; 6380 item->free_bit = bit;
@@ -6480,8 +6449,8 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
6480 fl = ctxt->c_first_suballocator; 6449 fl = ctxt->c_first_suballocator;
6481 6450
6482 if (fl->f_first) { 6451 if (fl->f_first) {
6483 mlog(0, "Free items: (type %u, slot %d)\n", 6452 trace_ocfs2_run_deallocs(fl->f_inode_type,
6484 fl->f_inode_type, fl->f_slot); 6453 fl->f_slot);
6485 ret2 = ocfs2_free_cached_blocks(osb, 6454 ret2 = ocfs2_free_cached_blocks(osb,
6486 fl->f_inode_type, 6455 fl->f_inode_type,
6487 fl->f_slot, 6456 fl->f_slot,
@@ -6558,8 +6527,9 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6558 goto out; 6527 goto out;
6559 } 6528 }
6560 6529
6561 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", 6530 trace_ocfs2_cache_block_dealloc(type, slot,
6562 type, slot, bit, (unsigned long long)blkno); 6531 (unsigned long long)suballoc,
6532 (unsigned long long)blkno, bit);
6563 6533
6564 item->free_bg = suballoc; 6534 item->free_bg = suballoc;
6565 item->free_blk = blkno; 6535 item->free_blk = blkno;
@@ -7005,8 +6975,6 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
7005 struct ocfs2_extent_tree et; 6975 struct ocfs2_extent_tree et;
7006 struct ocfs2_cached_dealloc_ctxt dealloc; 6976 struct ocfs2_cached_dealloc_ctxt dealloc;
7007 6977
7008 mlog_entry_void();
7009
7010 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 6978 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7011 ocfs2_init_dealloc_ctxt(&dealloc); 6979 ocfs2_init_dealloc_ctxt(&dealloc);
7012 6980
@@ -7041,8 +7009,11 @@ start:
7041 goto bail; 7009 goto bail;
7042 } 7010 }
7043 7011
7044 mlog(0, "inode->ip_clusters = %u, tree_depth = %u\n", 7012 trace_ocfs2_commit_truncate(
7045 OCFS2_I(inode)->ip_clusters, path->p_tree_depth); 7013 (unsigned long long)OCFS2_I(inode)->ip_blkno,
7014 new_highest_cpos,
7015 OCFS2_I(inode)->ip_clusters,
7016 path->p_tree_depth);
7046 7017
7047 /* 7018 /*
7048 * By now, el will point to the extent list on the bottom most 7019 * By now, el will point to the extent list on the bottom most
@@ -7136,7 +7107,6 @@ bail:
7136 7107
7137 ocfs2_free_path(path); 7108 ocfs2_free_path(path);
7138 7109
7139 mlog_exit(status);
7140 return status; 7110 return status;
7141} 7111}
7142 7112
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1fbb0e20131b..ac97bca282d2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,7 +29,6 @@
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31 31
32#define MLOG_MASK_PREFIX ML_FILE_IO
33#include <cluster/masklog.h> 32#include <cluster/masklog.h>
34 33
35#include "ocfs2.h" 34#include "ocfs2.h"
@@ -45,6 +44,7 @@
45#include "super.h" 44#include "super.h"
46#include "symlink.h" 45#include "symlink.h"
47#include "refcounttree.h" 46#include "refcounttree.h"
47#include "ocfs2_trace.h"
48 48
49#include "buffer_head_io.h" 49#include "buffer_head_io.h"
50 50
@@ -59,8 +59,9 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
59 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 59 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
60 void *kaddr; 60 void *kaddr;
61 61
62 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, 62 trace_ocfs2_symlink_get_block(
63 (unsigned long long)iblock, bh_result, create); 63 (unsigned long long)OCFS2_I(inode)->ip_blkno,
64 (unsigned long long)iblock, bh_result, create);
64 65
65 BUG_ON(ocfs2_inode_is_fast_symlink(inode)); 66 BUG_ON(ocfs2_inode_is_fast_symlink(inode));
66 67
@@ -123,7 +124,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
123bail: 124bail:
124 brelse(bh); 125 brelse(bh);
125 126
126 mlog_exit(err);
127 return err; 127 return err;
128} 128}
129 129
@@ -136,8 +136,8 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
136 u64 p_blkno, count, past_eof; 136 u64 p_blkno, count, past_eof;
137 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 137 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
138 138
139 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, 139 trace_ocfs2_get_block((unsigned long long)OCFS2_I(inode)->ip_blkno,
140 (unsigned long long)iblock, bh_result, create); 140 (unsigned long long)iblock, bh_result, create);
141 141
142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) 142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
143 mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", 143 mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",
@@ -199,8 +199,9 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
199 } 199 }
200 200
201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
202 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, 202
203 (unsigned long long)past_eof); 203 trace_ocfs2_get_block_end((unsigned long long)OCFS2_I(inode)->ip_blkno,
204 (unsigned long long)past_eof);
204 if (create && (iblock >= past_eof)) 205 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result); 206 set_buffer_new(bh_result);
206 207
@@ -208,7 +209,6 @@ bail:
208 if (err < 0) 209 if (err < 0)
209 err = -EIO; 210 err = -EIO;
210 211
211 mlog_exit(err);
212 return err; 212 return err;
213} 213}
214 214
@@ -278,7 +278,8 @@ static int ocfs2_readpage(struct file *file, struct page *page)
278 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; 278 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
279 int ret, unlock = 1; 279 int ret, unlock = 1;
280 280
281 mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); 281 trace_ocfs2_readpage((unsigned long long)oi->ip_blkno,
282 (page ? page->index : 0));
282 283
283 ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); 284 ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);
284 if (ret != 0) { 285 if (ret != 0) {
@@ -323,7 +324,6 @@ out_inode_unlock:
323out: 324out:
324 if (unlock) 325 if (unlock)
325 unlock_page(page); 326 unlock_page(page);
326 mlog_exit(ret);
327 return ret; 327 return ret;
328} 328}
329 329
@@ -396,15 +396,11 @@ out_unlock:
396 */ 396 */
397static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) 397static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
398{ 398{
399 int ret; 399 trace_ocfs2_writepage(
400 400 (unsigned long long)OCFS2_I(page->mapping->host)->ip_blkno,
401 mlog_entry("(0x%p)\n", page); 401 page->index);
402
403 ret = block_write_full_page(page, ocfs2_get_block, wbc);
404 402
405 mlog_exit(ret); 403 return block_write_full_page(page, ocfs2_get_block, wbc);
406
407 return ret;
408} 404}
409 405
410/* Taken from ext3. We don't necessarily need the full blown 406/* Taken from ext3. We don't necessarily need the full blown
@@ -450,7 +446,8 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
450 int err = 0; 446 int err = 0;
451 struct inode *inode = mapping->host; 447 struct inode *inode = mapping->host;
452 448
453 mlog_entry("(block = %llu)\n", (unsigned long long)block); 449 trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno,
450 (unsigned long long)block);
454 451
455 /* We don't need to lock journal system files, since they aren't 452 /* We don't need to lock journal system files, since they aren't
456 * accessed concurrently from multiple nodes. 453 * accessed concurrently from multiple nodes.
@@ -484,8 +481,6 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
484bail: 481bail:
485 status = err ? 0 : p_blkno; 482 status = err ? 0 : p_blkno;
486 483
487 mlog_exit((int)status);
488
489 return status; 484 return status;
490} 485}
491 486
@@ -616,9 +611,6 @@ static ssize_t ocfs2_direct_IO(int rw,
616{ 611{
617 struct file *file = iocb->ki_filp; 612 struct file *file = iocb->ki_filp;
618 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 613 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
619 int ret;
620
621 mlog_entry_void();
622 614
623 /* 615 /*
624 * Fallback to buffered I/O if we see an inode without 616 * Fallback to buffered I/O if we see an inode without
@@ -631,13 +623,10 @@ static ssize_t ocfs2_direct_IO(int rw,
631 if (i_size_read(inode) <= offset) 623 if (i_size_read(inode) <= offset)
632 return 0; 624 return 0;
633 625
634 ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 626 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
635 iov, offset, nr_segs, 627 iov, offset, nr_segs,
636 ocfs2_direct_IO_get_blocks, 628 ocfs2_direct_IO_get_blocks,
637 ocfs2_dio_end_io, NULL, 0); 629 ocfs2_dio_end_io, NULL, 0);
638
639 mlog_exit(ret);
640 return ret;
641} 630}
642 631
643static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, 632static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
@@ -1026,6 +1015,12 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
1026 ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos, 1015 ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,
1027 &cluster_start, &cluster_end); 1016 &cluster_start, &cluster_end);
1028 1017
1018 /* treat the write as new if the a hole/lseek spanned across
1019 * the page boundary.
1020 */
1021 new = new | ((i_size_read(inode) <= page_offset(page)) &&
1022 (page_offset(page) <= user_pos));
1023
1029 if (page == wc->w_target_page) { 1024 if (page == wc->w_target_page) {
1030 map_from = user_pos & (PAGE_CACHE_SIZE - 1); 1025 map_from = user_pos & (PAGE_CACHE_SIZE - 1);
1031 map_to = map_from + user_len; 1026 map_to = map_from + user_len;
@@ -1534,9 +1529,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
1534 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1529 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1535 struct ocfs2_dinode *di = NULL; 1530 struct ocfs2_dinode *di = NULL;
1536 1531
1537 mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n", 1532 trace_ocfs2_try_to_write_inline_data((unsigned long long)oi->ip_blkno,
1538 (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos, 1533 len, (unsigned long long)pos,
1539 oi->ip_dyn_features); 1534 oi->ip_dyn_features);
1540 1535
1541 /* 1536 /*
1542 * Handle inodes which already have inline data 1st. 1537 * Handle inodes which already have inline data 1st.
@@ -1739,6 +1734,13 @@ try_again:
1739 1734
1740 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 1735 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1741 1736
1737 trace_ocfs2_write_begin_nolock(
1738 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1739 (long long)i_size_read(inode),
1740 le32_to_cpu(di->i_clusters),
1741 pos, len, flags, mmap_page,
1742 clusters_to_alloc, extents_to_split);
1743
1742 /* 1744 /*
1743 * We set w_target_from, w_target_to here so that 1745 * We set w_target_from, w_target_to here so that
1744 * ocfs2_write_end() knows which range in the target page to 1746 * ocfs2_write_end() knows which range in the target page to
@@ -1751,12 +1753,6 @@ try_again:
1751 * ocfs2_lock_allocators(). It greatly over-estimates 1753 * ocfs2_lock_allocators(). It greatly over-estimates
1752 * the work to be done. 1754 * the work to be done.
1753 */ 1755 */
1754 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
1755 " clusters_to_add = %u, extents_to_split = %u\n",
1756 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1757 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
1758 clusters_to_alloc, extents_to_split);
1759
1760 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), 1756 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
1761 wc->w_di_bh); 1757 wc->w_di_bh);
1762 ret = ocfs2_lock_allocators(inode, &et, 1758 ret = ocfs2_lock_allocators(inode, &et,
@@ -1938,8 +1934,8 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1938 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); 1934 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1939 kunmap_atomic(kaddr, KM_USER0); 1935 kunmap_atomic(kaddr, KM_USER0);
1940 1936
1941 mlog(0, "Data written to inode at offset %llu. " 1937 trace_ocfs2_write_end_inline(
1942 "id_count = %u, copied = %u, i_dyn_features = 0x%x\n", 1938 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1943 (unsigned long long)pos, *copied, 1939 (unsigned long long)pos, *copied,
1944 le16_to_cpu(di->id2.i_data.id_count), 1940 le16_to_cpu(di->id2.i_data.id_count),
1945 le16_to_cpu(di->i_dyn_features)); 1941 le16_to_cpu(di->i_dyn_features));
@@ -2043,7 +2039,6 @@ const struct address_space_operations ocfs2_aops = {
2043 .write_begin = ocfs2_write_begin, 2039 .write_begin = ocfs2_write_begin,
2044 .write_end = ocfs2_write_end, 2040 .write_end = ocfs2_write_end,
2045 .bmap = ocfs2_bmap, 2041 .bmap = ocfs2_bmap,
2046 .sync_page = block_sync_page,
2047 .direct_IO = ocfs2_direct_IO, 2042 .direct_IO = ocfs2_direct_IO,
2048 .invalidatepage = ocfs2_invalidatepage, 2043 .invalidatepage = ocfs2_invalidatepage,
2049 .releasepage = ocfs2_releasepage, 2044 .releasepage = ocfs2_releasepage,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index eceb456037c1..75cf3ad987a6 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -71,7 +71,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
71 71
72/* 72/*
73 * Using a named enum representing lock types in terms of #N bit stored in 73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen 74 * iocb->private, which is going to be used for communication between
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read(). 75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */ 76 */
77enum ocfs2_iocb_lock_bits { 77enum ocfs2_iocb_lock_bits {
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index f9d5d3ffc75a..5d18ad10c27f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -35,8 +35,8 @@
35#include "inode.h" 35#include "inode.h"
36#include "journal.h" 36#include "journal.h"
37#include "uptodate.h" 37#include "uptodate.h"
38
39#include "buffer_head_io.h" 38#include "buffer_head_io.h"
39#include "ocfs2_trace.h"
40 40
41/* 41/*
42 * Bits on bh->b_state used by ocfs2. 42 * Bits on bh->b_state used by ocfs2.
@@ -55,8 +55,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
55{ 55{
56 int ret = 0; 56 int ret = 0;
57 57
58 mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n", 58 trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci);
59 (unsigned long long)bh->b_blocknr, ci);
60 59
61 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); 60 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
62 BUG_ON(buffer_jbd(bh)); 61 BUG_ON(buffer_jbd(bh));
@@ -66,6 +65,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
66 * can get modified during recovery even if read-only. */ 65 * can get modified during recovery even if read-only. */
67 if (ocfs2_is_hard_readonly(osb)) { 66 if (ocfs2_is_hard_readonly(osb)) {
68 ret = -EROFS; 67 ret = -EROFS;
68 mlog_errno(ret);
69 goto out; 69 goto out;
70 } 70 }
71 71
@@ -91,11 +91,11 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
91 * uptodate. */ 91 * uptodate. */
92 ret = -EIO; 92 ret = -EIO;
93 put_bh(bh); 93 put_bh(bh);
94 mlog_errno(ret);
94 } 95 }
95 96
96 ocfs2_metadata_cache_io_unlock(ci); 97 ocfs2_metadata_cache_io_unlock(ci);
97out: 98out:
98 mlog_exit(ret);
99 return ret; 99 return ret;
100} 100}
101 101
@@ -106,10 +106,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
106 unsigned int i; 106 unsigned int i;
107 struct buffer_head *bh; 107 struct buffer_head *bh;
108 108
109 if (!nr) { 109 trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
110 mlog(ML_BH_IO, "No buffers will be read!\n"); 110
111 if (!nr)
111 goto bail; 112 goto bail;
112 }
113 113
114 for (i = 0 ; i < nr ; i++) { 114 for (i = 0 ; i < nr ; i++) {
115 if (bhs[i] == NULL) { 115 if (bhs[i] == NULL) {
@@ -123,10 +123,8 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
123 bh = bhs[i]; 123 bh = bhs[i];
124 124
125 if (buffer_jbd(bh)) { 125 if (buffer_jbd(bh)) {
126 mlog(ML_BH_IO, 126 trace_ocfs2_read_blocks_sync_jbd(
127 "trying to sync read a jbd " 127 (unsigned long long)bh->b_blocknr);
128 "managed bh (blocknr = %llu), skipping\n",
129 (unsigned long long)bh->b_blocknr);
130 continue; 128 continue;
131 } 129 }
132 130
@@ -186,8 +184,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
186 struct buffer_head *bh; 184 struct buffer_head *bh;
187 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 185 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
188 186
189 mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n", 187 trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
190 ci, (unsigned long long)block, nr, flags);
191 188
192 BUG_ON(!ci); 189 BUG_ON(!ci);
193 BUG_ON((flags & OCFS2_BH_READAHEAD) && 190 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
@@ -207,7 +204,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
207 } 204 }
208 205
209 if (nr == 0) { 206 if (nr == 0) {
210 mlog(ML_BH_IO, "No buffers will be read!\n");
211 status = 0; 207 status = 0;
212 goto bail; 208 goto bail;
213 } 209 }
@@ -251,8 +247,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
251 */ 247 */
252 248
253 if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) { 249 if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
254 mlog(ML_UPTODATE, 250 trace_ocfs2_read_blocks_from_disk(
255 "bh (%llu), owner %llu not uptodate\n",
256 (unsigned long long)bh->b_blocknr, 251 (unsigned long long)bh->b_blocknr,
257 (unsigned long long)ocfs2_metadata_cache_owner(ci)); 252 (unsigned long long)ocfs2_metadata_cache_owner(ci));
258 /* We're using ignore_cache here to say 253 /* We're using ignore_cache here to say
@@ -260,11 +255,10 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
260 ignore_cache = 1; 255 ignore_cache = 1;
261 } 256 }
262 257
258 trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr,
259 ignore_cache, buffer_jbd(bh), buffer_dirty(bh));
260
263 if (buffer_jbd(bh)) { 261 if (buffer_jbd(bh)) {
264 if (ignore_cache)
265 mlog(ML_BH_IO, "trying to sync read a jbd "
266 "managed bh (blocknr = %llu)\n",
267 (unsigned long long)bh->b_blocknr);
268 continue; 262 continue;
269 } 263 }
270 264
@@ -272,9 +266,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
272 if (buffer_dirty(bh)) { 266 if (buffer_dirty(bh)) {
273 /* This should probably be a BUG, or 267 /* This should probably be a BUG, or
274 * at least return an error. */ 268 * at least return an error. */
275 mlog(ML_BH_IO, "asking me to sync read a dirty "
276 "buffer! (blocknr = %llu)\n",
277 (unsigned long long)bh->b_blocknr);
278 continue; 269 continue;
279 } 270 }
280 271
@@ -367,14 +358,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
367 } 358 }
368 ocfs2_metadata_cache_io_unlock(ci); 359 ocfs2_metadata_cache_io_unlock(ci);
369 360
370 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 361 trace_ocfs2_read_blocks_end((unsigned long long)block, nr,
371 (unsigned long long)block, nr, 362 flags, ignore_cache);
372 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
373 flags);
374 363
375bail: 364bail:
376 365
377 mlog_exit(status);
378 return status; 366 return status;
379} 367}
380 368
@@ -408,13 +396,12 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
408 int ret = 0; 396 int ret = 0;
409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 397 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
410 398
411 mlog_entry_void();
412
413 BUG_ON(buffer_jbd(bh)); 399 BUG_ON(buffer_jbd(bh));
414 ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); 400 ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
415 401
416 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { 402 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
417 ret = -EROFS; 403 ret = -EROFS;
404 mlog_errno(ret);
418 goto out; 405 goto out;
419 } 406 }
420 407
@@ -434,9 +421,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
434 if (!buffer_uptodate(bh)) { 421 if (!buffer_uptodate(bh)) {
435 ret = -EIO; 422 ret = -EIO;
436 put_bh(bh); 423 put_bh(bh);
424 mlog_errno(ret);
437 } 425 }
438 426
439out: 427out:
440 mlog_exit(ret);
441 return ret; 428 return ret;
442} 429}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b108e863d8f6..643720209a98 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -367,11 +367,7 @@ static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc,
367static void o2hb_wait_on_io(struct o2hb_region *reg, 367static void o2hb_wait_on_io(struct o2hb_region *reg,
368 struct o2hb_bio_wait_ctxt *wc) 368 struct o2hb_bio_wait_ctxt *wc)
369{ 369{
370 struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
371
372 blk_run_address_space(mapping);
373 o2hb_bio_wait_dec(wc, 1); 370 o2hb_bio_wait_dec(wc, 1);
374
375 wait_for_completion(&wc->wc_io_complete); 371 wait_for_completion(&wc->wc_io_complete);
376} 372}
377 373
@@ -1658,8 +1654,6 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
1658 struct o2hb_disk_slot *slot; 1654 struct o2hb_disk_slot *slot;
1659 struct o2hb_disk_heartbeat_block *hb_block; 1655 struct o2hb_disk_heartbeat_block *hb_block;
1660 1656
1661 mlog_entry_void();
1662
1663 ret = o2hb_read_slots(reg, reg->hr_blocks); 1657 ret = o2hb_read_slots(reg, reg->hr_blocks);
1664 if (ret) { 1658 if (ret) {
1665 mlog_errno(ret); 1659 mlog_errno(ret);
@@ -1681,7 +1675,6 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
1681 } 1675 }
1682 1676
1683out: 1677out:
1684 mlog_exit(ret);
1685 return ret; 1678 return ret;
1686} 1679}
1687 1680
@@ -2282,7 +2275,7 @@ void o2hb_free_hb_set(struct config_group *group)
2282 kfree(hs); 2275 kfree(hs);
2283} 2276}
2284 2277
2285/* hb callback registration and issueing */ 2278/* hb callback registration and issuing */
2286 2279
2287static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type) 2280static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type)
2288{ 2281{
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 6c61771469af..07ac24fd9252 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -30,7 +30,7 @@
30 30
31struct mlog_bits mlog_and_bits = MLOG_BITS_RHS(MLOG_INITIAL_AND_MASK); 31struct mlog_bits mlog_and_bits = MLOG_BITS_RHS(MLOG_INITIAL_AND_MASK);
32EXPORT_SYMBOL_GPL(mlog_and_bits); 32EXPORT_SYMBOL_GPL(mlog_and_bits);
33struct mlog_bits mlog_not_bits = MLOG_BITS_RHS(MLOG_INITIAL_NOT_MASK); 33struct mlog_bits mlog_not_bits = MLOG_BITS_RHS(0);
34EXPORT_SYMBOL_GPL(mlog_not_bits); 34EXPORT_SYMBOL_GPL(mlog_not_bits);
35 35
36static ssize_t mlog_mask_show(u64 mask, char *buf) 36static ssize_t mlog_mask_show(u64 mask, char *buf)
@@ -80,8 +80,6 @@ struct mlog_attribute {
80} 80}
81 81
82static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { 82static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
83 define_mask(ENTRY),
84 define_mask(EXIT),
85 define_mask(TCP), 83 define_mask(TCP),
86 define_mask(MSG), 84 define_mask(MSG),
87 define_mask(SOCKET), 85 define_mask(SOCKET),
@@ -93,27 +91,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
93 define_mask(DLM_THREAD), 91 define_mask(DLM_THREAD),
94 define_mask(DLM_MASTER), 92 define_mask(DLM_MASTER),
95 define_mask(DLM_RECOVERY), 93 define_mask(DLM_RECOVERY),
96 define_mask(AIO),
97 define_mask(JOURNAL),
98 define_mask(DISK_ALLOC),
99 define_mask(SUPER),
100 define_mask(FILE_IO),
101 define_mask(EXTENT_MAP),
102 define_mask(DLM_GLUE), 94 define_mask(DLM_GLUE),
103 define_mask(BH_IO),
104 define_mask(UPTODATE),
105 define_mask(NAMEI),
106 define_mask(INODE),
107 define_mask(VOTE), 95 define_mask(VOTE),
108 define_mask(DCACHE),
109 define_mask(CONN), 96 define_mask(CONN),
110 define_mask(QUORUM), 97 define_mask(QUORUM),
111 define_mask(EXPORT),
112 define_mask(XATTR),
113 define_mask(QUOTA),
114 define_mask(REFCOUNT),
115 define_mask(BASTS), 98 define_mask(BASTS),
116 define_mask(RESERVATIONS),
117 define_mask(CLUSTER), 99 define_mask(CLUSTER),
118 define_mask(ERROR), 100 define_mask(ERROR),
119 define_mask(NOTICE), 101 define_mask(NOTICE),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 34d6544357d9..baa2b9ef7eef 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -82,41 +82,23 @@
82 82
83/* bits that are frequently given and infrequently matched in the low word */ 83/* bits that are frequently given and infrequently matched in the low word */
84/* NOTE: If you add a flag, you need to also update masklog.c! */ 84/* NOTE: If you add a flag, you need to also update masklog.c! */
85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */ 85#define ML_TCP 0x0000000000000001ULL /* net cluster/tcp.c */
86#define ML_EXIT 0x0000000000000002ULL /* func call exit */ 86#define ML_MSG 0x0000000000000002ULL /* net network messages */
87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ 87#define ML_SOCKET 0x0000000000000004ULL /* net socket lifetime */
88#define ML_MSG 0x0000000000000008ULL /* net network messages */ 88#define ML_HEARTBEAT 0x0000000000000008ULL /* hb all heartbeat tracking */
89#define ML_SOCKET 0x0000000000000010ULL /* net socket lifetime */ 89#define ML_HB_BIO 0x0000000000000010ULL /* hb io tracing */
90#define ML_HEARTBEAT 0x0000000000000020ULL /* hb all heartbeat tracking */ 90#define ML_DLMFS 0x0000000000000020ULL /* dlm user dlmfs */
91#define ML_HB_BIO 0x0000000000000040ULL /* hb io tracing */ 91#define ML_DLM 0x0000000000000040ULL /* dlm general debugging */
92#define ML_DLMFS 0x0000000000000080ULL /* dlm user dlmfs */ 92#define ML_DLM_DOMAIN 0x0000000000000080ULL /* dlm domain debugging */
93#define ML_DLM 0x0000000000000100ULL /* dlm general debugging */ 93#define ML_DLM_THREAD 0x0000000000000100ULL /* dlm domain thread */
94#define ML_DLM_DOMAIN 0x0000000000000200ULL /* dlm domain debugging */ 94#define ML_DLM_MASTER 0x0000000000000200ULL /* dlm master functions */
95#define ML_DLM_THREAD 0x0000000000000400ULL /* dlm domain thread */ 95#define ML_DLM_RECOVERY 0x0000000000000400ULL /* dlm master functions */
96#define ML_DLM_MASTER 0x0000000000000800ULL /* dlm master functions */ 96#define ML_DLM_GLUE 0x0000000000000800ULL /* ocfs2 dlm glue layer */
97#define ML_DLM_RECOVERY 0x0000000000001000ULL /* dlm master functions */ 97#define ML_VOTE 0x0000000000001000ULL /* ocfs2 node messaging */
98#define ML_AIO 0x0000000000002000ULL /* ocfs2 aio read and write */ 98#define ML_CONN 0x0000000000002000ULL /* net connection management */
99#define ML_JOURNAL 0x0000000000004000ULL /* ocfs2 journalling functions */ 99#define ML_QUORUM 0x0000000000004000ULL /* net connection quorum */
100#define ML_DISK_ALLOC 0x0000000000008000ULL /* ocfs2 disk allocation */ 100#define ML_BASTS 0x0000000000008000ULL /* dlmglue asts and basts */
101#define ML_SUPER 0x0000000000010000ULL /* ocfs2 mount / umount */ 101#define ML_CLUSTER 0x0000000000010000ULL /* cluster stack */
102#define ML_FILE_IO 0x0000000000020000ULL /* ocfs2 file I/O */
103#define ML_EXTENT_MAP 0x0000000000040000ULL /* ocfs2 extent map caching */
104#define ML_DLM_GLUE 0x0000000000080000ULL /* ocfs2 dlm glue layer */
105#define ML_BH_IO 0x0000000000100000ULL /* ocfs2 buffer I/O */
106#define ML_UPTODATE 0x0000000000200000ULL /* ocfs2 caching sequence #'s */
107#define ML_NAMEI 0x0000000000400000ULL /* ocfs2 directory / namespace */
108#define ML_INODE 0x0000000000800000ULL /* ocfs2 inode manipulation */
109#define ML_VOTE 0x0000000001000000ULL /* ocfs2 node messaging */
110#define ML_DCACHE 0x0000000002000000ULL /* ocfs2 dcache operations */
111#define ML_CONN 0x0000000004000000ULL /* net connection management */
112#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
113#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
117#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
118#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
119#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
120 102
121/* bits that are infrequently given and frequently matched in the high word */ 103/* bits that are infrequently given and frequently matched in the high word */
122#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */ 104#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
@@ -124,7 +106,6 @@
124#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */ 106#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
125 107
126#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 108#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
127#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
128#ifndef MLOG_MASK_PREFIX 109#ifndef MLOG_MASK_PREFIX
129#define MLOG_MASK_PREFIX 0 110#define MLOG_MASK_PREFIX 0
130#endif 111#endif
@@ -222,58 +203,6 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
222 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ 203 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
223} while (0) 204} while (0)
224 205
225#if defined(CONFIG_OCFS2_DEBUG_MASKLOG)
226#define mlog_entry(fmt, args...) do { \
227 mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \
228} while (0)
229
230#define mlog_entry_void() do { \
231 mlog(ML_ENTRY, "ENTRY:\n"); \
232} while (0)
233
234/*
235 * We disable this for sparse.
236 */
237#if !defined(__CHECKER__)
238#define mlog_exit(st) do { \
239 if (__builtin_types_compatible_p(typeof(st), unsigned long)) \
240 mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \
241 else if (__builtin_types_compatible_p(typeof(st), signed long)) \
242 mlog(ML_EXIT, "EXIT: %ld\n", (signed long) (st)); \
243 else if (__builtin_types_compatible_p(typeof(st), unsigned int) \
244 || __builtin_types_compatible_p(typeof(st), unsigned short) \
245 || __builtin_types_compatible_p(typeof(st), unsigned char)) \
246 mlog(ML_EXIT, "EXIT: %u\n", (unsigned int) (st)); \
247 else if (__builtin_types_compatible_p(typeof(st), signed int) \
248 || __builtin_types_compatible_p(typeof(st), signed short) \
249 || __builtin_types_compatible_p(typeof(st), signed char)) \
250 mlog(ML_EXIT, "EXIT: %d\n", (signed int) (st)); \
251 else if (__builtin_types_compatible_p(typeof(st), long long)) \
252 mlog(ML_EXIT, "EXIT: %lld\n", (long long) (st)); \
253 else \
254 mlog(ML_EXIT, "EXIT: %llu\n", (unsigned long long) (st)); \
255} while (0)
256#else
257#define mlog_exit(st) do { \
258 mlog(ML_EXIT, "EXIT: %lld\n", (long long) (st)); \
259} while (0)
260#endif
261
262#define mlog_exit_ptr(ptr) do { \
263 mlog(ML_EXIT, "EXIT: %p\n", ptr); \
264} while (0)
265
266#define mlog_exit_void() do { \
267 mlog(ML_EXIT, "EXIT\n"); \
268} while (0)
269#else
270#define mlog_entry(...) do { } while (0)
271#define mlog_entry_void(...) do { } while (0)
272#define mlog_exit(...) do { } while (0)
273#define mlog_exit_ptr(...) do { } while (0)
274#define mlog_exit_void(...) do { } while (0)
275#endif /* defined(CONFIG_OCFS2_DEBUG_MASKLOG) */
276
277#define mlog_bug_on_msg(cond, fmt, args...) do { \ 206#define mlog_bug_on_msg(cond, fmt, args...) do { \
278 if (cond) { \ 207 if (cond) { \
279 mlog(ML_ERROR, "bug expression: " #cond "\n"); \ 208 mlog(ML_ERROR, "bug expression: " #cond "\n"); \
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index a87366750f23..8f9cea1597af 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -89,7 +89,7 @@ static void o2quo_fence_self(void)
89 }; 89 };
90} 90}
91 91
92/* Indicate that a timeout occured on a hearbeat region write. The 92/* Indicate that a timeout occurred on a hearbeat region write. The
93 * other nodes in the cluster may consider us dead at that time so we 93 * other nodes in the cluster may consider us dead at that time so we
94 * want to "fence" ourselves so that we don't scribble on the disk 94 * want to "fence" ourselves so that we don't scribble on the disk
95 * after they think they've recovered us. This can't solve all 95 * after they think they've recovered us. This can't solve all
@@ -261,7 +261,7 @@ void o2quo_hb_still_up(u8 node)
261 spin_unlock(&qs->qs_lock); 261 spin_unlock(&qs->qs_lock);
262} 262}
263 263
264/* This is analagous to hb_up. as a node's connection comes up we delay the 264/* This is analogous to hb_up. as a node's connection comes up we delay the
265 * quorum decision until we see it heartbeating. the hold will be droped in 265 * quorum decision until we see it heartbeating. the hold will be droped in
266 * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if 266 * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if
267 * it's already heartbeating we we might be dropping a hold that conn_up got. 267 * it's already heartbeating we we might be dropping a hold that conn_up got.
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 3b11cb1e38fc..db5ee4b4f47a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -210,10 +210,6 @@ static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
210 sc->sc_tv_func_stop = ktime_get(); 210 sc->sc_tv_func_stop = ktime_get();
211} 211}
212 212
213static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
214{
215 return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
216}
217#else /* CONFIG_DEBUG_FS */ 213#else /* CONFIG_DEBUG_FS */
218# define o2net_init_nst(a, b, c, d, e) 214# define o2net_init_nst(a, b, c, d, e)
219# define o2net_set_nst_sock_time(a) 215# define o2net_set_nst_sock_time(a)
@@ -227,10 +223,14 @@ static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
227# define o2net_set_advance_stop_time(a) 223# define o2net_set_advance_stop_time(a)
228# define o2net_set_func_start_time(a) 224# define o2net_set_func_start_time(a)
229# define o2net_set_func_stop_time(a) 225# define o2net_set_func_stop_time(a)
230# define o2net_get_func_run_time(a) (ktime_t)0
231#endif /* CONFIG_DEBUG_FS */ 226#endif /* CONFIG_DEBUG_FS */
232 227
233#ifdef CONFIG_OCFS2_FS_STATS 228#ifdef CONFIG_OCFS2_FS_STATS
229static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
230{
231 return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
232}
233
234static void o2net_update_send_stats(struct o2net_send_tracking *nst, 234static void o2net_update_send_stats(struct o2net_send_tracking *nst,
235 struct o2net_sock_container *sc) 235 struct o2net_sock_container *sc)
236{ 236{
@@ -565,7 +565,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
565 * the work queue actually being up. */ 565 * the work queue actually being up. */
566 if (!valid && o2net_wq) { 566 if (!valid && o2net_wq) {
567 unsigned long delay; 567 unsigned long delay;
568 /* delay if we're withing a RECONNECT_DELAY of the 568 /* delay if we're within a RECONNECT_DELAY of the
569 * last attempt */ 569 * last attempt */
570 delay = (nn->nn_last_connect_attempt + 570 delay = (nn->nn_last_connect_attempt +
571 msecs_to_jiffies(o2net_reconnect_delay())) 571 msecs_to_jiffies(o2net_reconnect_delay()))
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 7eb90403fc8a..e5ba34818332 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30 30
31#define MLOG_MASK_PREFIX ML_DCACHE
32#include <cluster/masklog.h> 31#include <cluster/masklog.h>
33 32
34#include "ocfs2.h" 33#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "file.h" 38#include "file.h"
40#include "inode.h" 39#include "inode.h"
41#include "super.h" 40#include "super.h"
41#include "ocfs2_trace.h"
42 42
43void ocfs2_dentry_attach_gen(struct dentry *dentry) 43void ocfs2_dentry_attach_gen(struct dentry *dentry)
44{ 44{
@@ -62,8 +62,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
63 osb = OCFS2_SB(dentry->d_sb); 63 osb = OCFS2_SB(dentry->d_sb);
64 64
65 mlog_entry("(0x%p, '%.*s')\n", dentry, 65 trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
66 dentry->d_name.len, dentry->d_name.name); 66 dentry->d_name.name);
67 67
68 /* For a negative dentry - 68 /* For a negative dentry -
69 * check the generation number of the parent and compare with the 69 * check the generation number of the parent and compare with the
@@ -73,9 +73,10 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
73 unsigned long gen = (unsigned long) dentry->d_fsdata; 73 unsigned long gen = (unsigned long) dentry->d_fsdata;
74 unsigned long pgen = 74 unsigned long pgen =
75 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; 75 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
76 mlog(0, "negative dentry: %.*s parent gen: %lu " 76
77 "dentry gen: %lu\n", 77 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
78 dentry->d_name.len, dentry->d_name.name, pgen, gen); 78 dentry->d_name.name,
79 pgen, gen);
79 if (gen != pgen) 80 if (gen != pgen)
80 goto bail; 81 goto bail;
81 goto valid; 82 goto valid;
@@ -90,8 +91,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
90 /* did we or someone else delete this inode? */ 91 /* did we or someone else delete this inode? */
91 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 92 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
92 spin_unlock(&OCFS2_I(inode)->ip_lock); 93 spin_unlock(&OCFS2_I(inode)->ip_lock);
93 mlog(0, "inode (%llu) deleted, returning false\n", 94 trace_ocfs2_dentry_revalidate_delete(
94 (unsigned long long)OCFS2_I(inode)->ip_blkno); 95 (unsigned long long)OCFS2_I(inode)->ip_blkno);
95 goto bail; 96 goto bail;
96 } 97 }
97 spin_unlock(&OCFS2_I(inode)->ip_lock); 98 spin_unlock(&OCFS2_I(inode)->ip_lock);
@@ -101,10 +102,9 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
101 * inode nlink hits zero, it never goes back. 102 * inode nlink hits zero, it never goes back.
102 */ 103 */
103 if (inode->i_nlink == 0) { 104 if (inode->i_nlink == 0) {
104 mlog(0, "Inode %llu orphaned, returning false " 105 trace_ocfs2_dentry_revalidate_orphaned(
105 "dir = %d\n", 106 (unsigned long long)OCFS2_I(inode)->ip_blkno,
106 (unsigned long long)OCFS2_I(inode)->ip_blkno, 107 S_ISDIR(inode->i_mode));
107 S_ISDIR(inode->i_mode));
108 goto bail; 108 goto bail;
109 } 109 }
110 110
@@ -113,9 +113,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
113 * redo it. 113 * redo it.
114 */ 114 */
115 if (!dentry->d_fsdata) { 115 if (!dentry->d_fsdata) {
116 mlog(0, "Inode %llu doesn't have dentry lock, " 116 trace_ocfs2_dentry_revalidate_nofsdata(
117 "returning false\n", 117 (unsigned long long)OCFS2_I(inode)->ip_blkno);
118 (unsigned long long)OCFS2_I(inode)->ip_blkno);
119 goto bail; 118 goto bail;
120 } 119 }
121 120
@@ -123,8 +122,7 @@ valid:
123 ret = 1; 122 ret = 1;
124 123
125bail: 124bail:
126 mlog_exit(ret); 125 trace_ocfs2_dentry_revalidate_ret(ret);
127
128 return ret; 126 return ret;
129} 127}
130 128
@@ -181,8 +179,8 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
181 179
182 spin_lock(&dentry->d_lock); 180 spin_lock(&dentry->d_lock);
183 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 181 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
184 mlog(0, "dentry found: %.*s\n", 182 trace_ocfs2_find_local_alias(dentry->d_name.len,
185 dentry->d_name.len, dentry->d_name.name); 183 dentry->d_name.name);
186 184
187 dget_dlock(dentry); 185 dget_dlock(dentry);
188 spin_unlock(&dentry->d_lock); 186 spin_unlock(&dentry->d_lock);
@@ -240,9 +238,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
240 struct dentry *alias; 238 struct dentry *alias;
241 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 239 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
242 240
243 mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n", 241 trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name,
244 dentry->d_name.len, dentry->d_name.name, 242 (unsigned long long)parent_blkno, dl);
245 (unsigned long long)parent_blkno, dl);
246 243
247 /* 244 /*
248 * Negative dentry. We ignore these for now. 245 * Negative dentry. We ignore these for now.
@@ -292,7 +289,9 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
292 (unsigned long long)parent_blkno, 289 (unsigned long long)parent_blkno,
293 (unsigned long long)dl->dl_parent_blkno); 290 (unsigned long long)dl->dl_parent_blkno);
294 291
295 mlog(0, "Found: %s\n", dl->dl_lockres.l_name); 292 trace_ocfs2_dentry_attach_lock_found(dl->dl_lockres.l_name,
293 (unsigned long long)parent_blkno,
294 (unsigned long long)OCFS2_I(inode)->ip_blkno);
296 295
297 goto out_attach; 296 goto out_attach;
298 } 297 }
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f97b6f1c61dd..9fe5b8fd658f 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -43,7 +43,6 @@
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44#include <linux/sort.h> 44#include <linux/sort.h>
45 45
46#define MLOG_MASK_PREFIX ML_NAMEI
47#include <cluster/masklog.h> 46#include <cluster/masklog.h>
48 47
49#include "ocfs2.h" 48#include "ocfs2.h"
@@ -61,6 +60,7 @@
61#include "super.h" 60#include "super.h"
62#include "sysfile.h" 61#include "sysfile.h"
63#include "uptodate.h" 62#include "uptodate.h"
63#include "ocfs2_trace.h"
64 64
65#include "buffer_head_io.h" 65#include "buffer_head_io.h"
66 66
@@ -322,21 +322,23 @@ static int ocfs2_check_dir_entry(struct inode * dir,
322 const char *error_msg = NULL; 322 const char *error_msg = NULL;
323 const int rlen = le16_to_cpu(de->rec_len); 323 const int rlen = le16_to_cpu(de->rec_len);
324 324
325 if (rlen < OCFS2_DIR_REC_LEN(1)) 325 if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
326 error_msg = "rec_len is smaller than minimal"; 326 error_msg = "rec_len is smaller than minimal";
327 else if (rlen % 4 != 0) 327 else if (unlikely(rlen % 4 != 0))
328 error_msg = "rec_len % 4 != 0"; 328 error_msg = "rec_len % 4 != 0";
329 else if (rlen < OCFS2_DIR_REC_LEN(de->name_len)) 329 else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
330 error_msg = "rec_len is too small for name_len"; 330 error_msg = "rec_len is too small for name_len";
331 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) 331 else if (unlikely(
332 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
332 error_msg = "directory entry across blocks"; 333 error_msg = "directory entry across blocks";
333 334
334 if (error_msg != NULL) 335 if (unlikely(error_msg != NULL))
335 mlog(ML_ERROR, "bad entry in directory #%llu: %s - " 336 mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
336 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n", 337 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
337 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg, 338 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
338 offset, (unsigned long long)le64_to_cpu(de->inode), rlen, 339 offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
339 de->name_len); 340 de->name_len);
341
340 return error_msg == NULL ? 1 : 0; 342 return error_msg == NULL ? 1 : 0;
341} 343}
342 344
@@ -367,8 +369,6 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
367 int de_len; 369 int de_len;
368 int ret = 0; 370 int ret = 0;
369 371
370 mlog_entry_void();
371
372 de_buf = first_de; 372 de_buf = first_de;
373 dlimit = de_buf + bytes; 373 dlimit = de_buf + bytes;
374 374
@@ -402,7 +402,7 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
402 } 402 }
403 403
404bail: 404bail:
405 mlog_exit(ret); 405 trace_ocfs2_search_dirblock(ret);
406 return ret; 406 return ret;
407} 407}
408 408
@@ -447,8 +447,7 @@ static int ocfs2_validate_dir_block(struct super_block *sb,
447 * We don't validate dirents here, that's handled 447 * We don't validate dirents here, that's handled
448 * in-place when the code walks them. 448 * in-place when the code walks them.
449 */ 449 */
450 mlog(0, "Validating dirblock %llu\n", 450 trace_ocfs2_validate_dir_block((unsigned long long)bh->b_blocknr);
451 (unsigned long long)bh->b_blocknr);
452 451
453 BUG_ON(!buffer_uptodate(bh)); 452 BUG_ON(!buffer_uptodate(bh));
454 453
@@ -706,8 +705,6 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
706 int num = 0; 705 int num = 0;
707 int nblocks, i, err; 706 int nblocks, i, err;
708 707
709 mlog_entry_void();
710
711 sb = dir->i_sb; 708 sb = dir->i_sb;
712 709
713 nblocks = i_size_read(dir) >> sb->s_blocksize_bits; 710 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
@@ -788,7 +785,7 @@ cleanup_and_exit:
788 for (; ra_ptr < ra_max; ra_ptr++) 785 for (; ra_ptr < ra_max; ra_ptr++)
789 brelse(bh_use[ra_ptr]); 786 brelse(bh_use[ra_ptr]);
790 787
791 mlog_exit_ptr(ret); 788 trace_ocfs2_find_entry_el(ret);
792 return ret; 789 return ret;
793} 790}
794 791
@@ -950,11 +947,9 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
950 goto out; 947 goto out;
951 } 948 }
952 949
953 mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x " 950 trace_ocfs2_dx_dir_search((unsigned long long)OCFS2_I(dir)->ip_blkno,
954 "returns: %llu\n", 951 namelen, name, hinfo->major_hash,
955 (unsigned long long)OCFS2_I(dir)->ip_blkno, 952 hinfo->minor_hash, (unsigned long long)phys);
956 namelen, name, hinfo->major_hash, hinfo->minor_hash,
957 (unsigned long long)phys);
958 953
959 ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh); 954 ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh);
960 if (ret) { 955 if (ret) {
@@ -964,9 +959,9 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
964 959
965 dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data; 960 dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
966 961
967 mlog(0, "leaf info: num_used: %d, count: %d\n", 962 trace_ocfs2_dx_dir_search_leaf_info(
968 le16_to_cpu(dx_leaf->dl_list.de_num_used), 963 le16_to_cpu(dx_leaf->dl_list.de_num_used),
969 le16_to_cpu(dx_leaf->dl_list.de_count)); 964 le16_to_cpu(dx_leaf->dl_list.de_count));
970 965
971 entry_list = &dx_leaf->dl_list; 966 entry_list = &dx_leaf->dl_list;
972 967
@@ -1166,8 +1161,6 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1166 int i, status = -ENOENT; 1161 int i, status = -ENOENT;
1167 ocfs2_journal_access_func access = ocfs2_journal_access_db; 1162 ocfs2_journal_access_func access = ocfs2_journal_access_db;
1168 1163
1169 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1170
1171 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 1164 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1172 access = ocfs2_journal_access_di; 1165 access = ocfs2_journal_access_di;
1173 1166
@@ -1202,7 +1195,6 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1202 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len)); 1195 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1203 } 1196 }
1204bail: 1197bail:
1205 mlog_exit(status);
1206 return status; 1198 return status;
1207} 1199}
1208 1200
@@ -1348,8 +1340,8 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1348 } 1340 }
1349 } 1341 }
1350 1342
1351 mlog(0, "Dir %llu: delete entry at index: %d\n", 1343 trace_ocfs2_delete_entry_dx((unsigned long long)OCFS2_I(dir)->ip_blkno,
1352 (unsigned long long)OCFS2_I(dir)->ip_blkno, index); 1344 index);
1353 1345
1354 ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry, 1346 ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
1355 leaf_bh, leaf_bh->b_data, leaf_bh->b_size); 1347 leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
@@ -1632,8 +1624,6 @@ int __ocfs2_add_entry(handle_t *handle,
1632 struct buffer_head *insert_bh = lookup->dl_leaf_bh; 1624 struct buffer_head *insert_bh = lookup->dl_leaf_bh;
1633 char *data_start = insert_bh->b_data; 1625 char *data_start = insert_bh->b_data;
1634 1626
1635 mlog_entry_void();
1636
1637 if (!namelen) 1627 if (!namelen)
1638 return -EINVAL; 1628 return -EINVAL;
1639 1629
@@ -1765,8 +1755,9 @@ int __ocfs2_add_entry(handle_t *handle,
1765 * from ever getting here. */ 1755 * from ever getting here. */
1766 retval = -ENOSPC; 1756 retval = -ENOSPC;
1767bail: 1757bail:
1758 if (retval)
1759 mlog_errno(retval);
1768 1760
1769 mlog_exit(retval);
1770 return retval; 1761 return retval;
1771} 1762}
1772 1763
@@ -2028,8 +2019,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
2028 struct inode *inode = filp->f_path.dentry->d_inode; 2019 struct inode *inode = filp->f_path.dentry->d_inode;
2029 int lock_level = 0; 2020 int lock_level = 0;
2030 2021
2031 mlog_entry("dirino=%llu\n", 2022 trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
2032 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2033 2023
2034 error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); 2024 error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
2035 if (lock_level && error >= 0) { 2025 if (lock_level && error >= 0) {
@@ -2051,9 +2041,10 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
2051 dirent, filldir, NULL); 2041 dirent, filldir, NULL);
2052 2042
2053 ocfs2_inode_unlock(inode, lock_level); 2043 ocfs2_inode_unlock(inode, lock_level);
2044 if (error)
2045 mlog_errno(error);
2054 2046
2055bail_nolock: 2047bail_nolock:
2056 mlog_exit(error);
2057 2048
2058 return error; 2049 return error;
2059} 2050}
@@ -2069,8 +2060,8 @@ int ocfs2_find_files_on_disk(const char *name,
2069{ 2060{
2070 int status = -ENOENT; 2061 int status = -ENOENT;
2071 2062
2072 mlog(0, "name=%.*s, blkno=%p, inode=%llu\n", namelen, name, blkno, 2063 trace_ocfs2_find_files_on_disk(namelen, name, blkno,
2073 (unsigned long long)OCFS2_I(inode)->ip_blkno); 2064 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2074 2065
2075 status = ocfs2_find_entry(name, namelen, inode, lookup); 2066 status = ocfs2_find_entry(name, namelen, inode, lookup);
2076 if (status) 2067 if (status)
@@ -2114,8 +2105,8 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
2114 int ret; 2105 int ret;
2115 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2106 struct ocfs2_dir_lookup_result lookup = { NULL, };
2116 2107
2117 mlog_entry("dir %llu, name '%.*s'\n", 2108 trace_ocfs2_check_dir_for_entry(
2118 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); 2109 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
2119 2110
2120 ret = -EEXIST; 2111 ret = -EEXIST;
2121 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) 2112 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0)
@@ -2125,7 +2116,8 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
2125bail: 2116bail:
2126 ocfs2_free_dir_lookup_result(&lookup); 2117 ocfs2_free_dir_lookup_result(&lookup);
2127 2118
2128 mlog_exit(ret); 2119 if (ret)
2120 mlog_errno(ret);
2129 return ret; 2121 return ret;
2130} 2122}
2131 2123
@@ -2324,8 +2316,6 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2324 struct buffer_head *new_bh = NULL; 2316 struct buffer_head *new_bh = NULL;
2325 struct ocfs2_dir_entry *de; 2317 struct ocfs2_dir_entry *de;
2326 2318
2327 mlog_entry_void();
2328
2329 if (ocfs2_new_dir_wants_trailer(inode)) 2319 if (ocfs2_new_dir_wants_trailer(inode))
2330 size = ocfs2_dir_trailer_blk_off(parent->i_sb); 2320 size = ocfs2_dir_trailer_blk_off(parent->i_sb);
2331 2321
@@ -2380,7 +2370,6 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2380bail: 2370bail:
2381 brelse(new_bh); 2371 brelse(new_bh);
2382 2372
2383 mlog_exit(status);
2384 return status; 2373 return status;
2385} 2374}
2386 2375
@@ -2409,9 +2398,9 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2409 goto out; 2398 goto out;
2410 } 2399 }
2411 2400
2412 mlog(0, "Dir %llu, attach new index block: %llu\n", 2401 trace_ocfs2_dx_dir_attach_index(
2413 (unsigned long long)OCFS2_I(dir)->ip_blkno, 2402 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2414 (unsigned long long)dr_blkno); 2403 (unsigned long long)dr_blkno);
2415 2404
2416 dx_root_bh = sb_getblk(osb->sb, dr_blkno); 2405 dx_root_bh = sb_getblk(osb->sb, dr_blkno);
2417 if (dx_root_bh == NULL) { 2406 if (dx_root_bh == NULL) {
@@ -2511,11 +2500,10 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
2511 dx_leaf->dl_list.de_count = 2500 dx_leaf->dl_list.de_count =
2512 cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb)); 2501 cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb));
2513 2502
2514 mlog(0, 2503 trace_ocfs2_dx_dir_format_cluster(
2515 "Dir %llu, format dx_leaf: %llu, entry count: %u\n", 2504 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2516 (unsigned long long)OCFS2_I(dir)->ip_blkno, 2505 (unsigned long long)bh->b_blocknr,
2517 (unsigned long long)bh->b_blocknr, 2506 le16_to_cpu(dx_leaf->dl_list.de_count));
2518 le16_to_cpu(dx_leaf->dl_list.de_count));
2519 2507
2520 ocfs2_journal_dirty(handle, bh); 2508 ocfs2_journal_dirty(handle, bh);
2521 } 2509 }
@@ -2759,12 +2747,11 @@ static void ocfs2_dx_dir_index_root_block(struct inode *dir,
2759 2747
2760 ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo); 2748 ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
2761 2749
2762 mlog(0, 2750 trace_ocfs2_dx_dir_index_root_block(
2763 "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n", 2751 (unsigned long long)dir->i_ino,
2764 (unsigned long long)dir->i_ino, hinfo.major_hash, 2752 hinfo.major_hash, hinfo.minor_hash,
2765 hinfo.minor_hash, 2753 de->name_len, de->name,
2766 le16_to_cpu(dx_root->dr_entries.de_num_used), 2754 le16_to_cpu(dx_root->dr_entries.de_num_used));
2767 de->name_len, de->name);
2768 2755
2769 ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo, 2756 ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
2770 dirent_blk); 2757 dirent_blk);
@@ -3235,7 +3222,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
3235bail: 3222bail:
3236 if (did_quota && status < 0) 3223 if (did_quota && status < 0)
3237 dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); 3224 dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
3238 mlog_exit(status);
3239 return status; 3225 return status;
3240} 3226}
3241 3227
@@ -3270,8 +3256,6 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3270 struct ocfs2_extent_tree et; 3256 struct ocfs2_extent_tree et;
3271 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; 3257 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
3272 3258
3273 mlog_entry_void();
3274
3275 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 3259 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
3276 /* 3260 /*
3277 * This would be a code error as an inline directory should 3261 * This would be a code error as an inline directory should
@@ -3320,8 +3304,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3320 down_write(&OCFS2_I(dir)->ip_alloc_sem); 3304 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3321 drop_alloc_sem = 1; 3305 drop_alloc_sem = 1;
3322 dir_i_size = i_size_read(dir); 3306 dir_i_size = i_size_read(dir);
3323 mlog(0, "extending dir %llu (i_size = %lld)\n", 3307 trace_ocfs2_extend_dir((unsigned long long)OCFS2_I(dir)->ip_blkno,
3324 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); 3308 dir_i_size);
3325 3309
3326 /* dir->i_size is always block aligned. */ 3310 /* dir->i_size is always block aligned. */
3327 spin_lock(&OCFS2_I(dir)->ip_lock); 3311 spin_lock(&OCFS2_I(dir)->ip_lock);
@@ -3436,7 +3420,6 @@ bail:
3436 3420
3437 brelse(new_bh); 3421 brelse(new_bh);
3438 3422
3439 mlog_exit(status);
3440 return status; 3423 return status;
3441} 3424}
3442 3425
@@ -3583,8 +3566,9 @@ next:
3583 status = 0; 3566 status = 0;
3584bail: 3567bail:
3585 brelse(bh); 3568 brelse(bh);
3569 if (status)
3570 mlog_errno(status);
3586 3571
3587 mlog_exit(status);
3588 return status; 3572 return status;
3589} 3573}
3590 3574
@@ -3815,9 +3799,9 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3815 struct ocfs2_dx_root_block *dx_root; 3799 struct ocfs2_dx_root_block *dx_root;
3816 struct ocfs2_dx_leaf *tmp_dx_leaf = NULL; 3800 struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
3817 3801
3818 mlog(0, "DX Dir: %llu, rebalance leaf leaf_blkno: %llu insert: %u\n", 3802 trace_ocfs2_dx_dir_rebalance((unsigned long long)OCFS2_I(dir)->ip_blkno,
3819 (unsigned long long)OCFS2_I(dir)->ip_blkno, 3803 (unsigned long long)leaf_blkno,
3820 (unsigned long long)leaf_blkno, insert_hash); 3804 insert_hash);
3821 3805
3822 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh); 3806 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
3823 3807
@@ -3897,8 +3881,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3897 goto out_commit; 3881 goto out_commit;
3898 } 3882 }
3899 3883
3900 mlog(0, "Split leaf (%u) at %u, insert major hash is %u\n", 3884 trace_ocfs2_dx_dir_rebalance_split(leaf_cpos, split_hash, insert_hash);
3901 leaf_cpos, split_hash, insert_hash);
3902 3885
3903 /* 3886 /*
3904 * We have to carefully order operations here. There are items 3887 * We have to carefully order operations here. There are items
@@ -4355,8 +4338,8 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
4355 unsigned int blocks_wanted = 1; 4338 unsigned int blocks_wanted = 1;
4356 struct buffer_head *bh = NULL; 4339 struct buffer_head *bh = NULL;
4357 4340
4358 mlog(0, "getting ready to insert namelen %d into dir %llu\n", 4341 trace_ocfs2_prepare_dir_for_insert(
4359 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); 4342 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen);
4360 4343
4361 if (!namelen) { 4344 if (!namelen) {
4362 ret = -EINVAL; 4345 ret = -EINVAL;
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index dcebf0d920fa..c8a044efbb15 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o 3obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
4 4
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 9f30491e5e88..29a886d1e82c 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -128,8 +128,8 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
128 128
129 assert_spin_locked(&res->spinlock); 129 assert_spin_locked(&res->spinlock);
130 130
131 mlog_entry("type=%d, convert_type=%d, new convert_type=%d\n", 131 mlog(0, "type=%d, convert_type=%d, new convert_type=%d\n",
132 lock->ml.type, lock->ml.convert_type, type); 132 lock->ml.type, lock->ml.convert_type, type);
133 133
134 spin_lock(&lock->spinlock); 134 spin_lock(&lock->spinlock);
135 135
@@ -353,7 +353,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
353 struct kvec vec[2]; 353 struct kvec vec[2];
354 size_t veclen = 1; 354 size_t veclen = 1;
355 355
356 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 356 mlog(0, "%.*s\n", res->lockname.len, res->lockname.name);
357 357
358 memset(&convert, 0, sizeof(struct dlm_convert_lock)); 358 memset(&convert, 0, sizeof(struct dlm_convert_lock));
359 convert.node_idx = dlm->node_num; 359 convert.node_idx = dlm->node_num;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7e38a072d720..7540a492eaba 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -188,7 +188,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
188 struct hlist_head *bucket; 188 struct hlist_head *bucket;
189 struct hlist_node *list; 189 struct hlist_node *list;
190 190
191 mlog_entry("%.*s\n", len, name); 191 mlog(0, "%.*s\n", len, name);
192 192
193 assert_spin_locked(&dlm->spinlock); 193 assert_spin_locked(&dlm->spinlock);
194 194
@@ -222,7 +222,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
222{ 222{
223 struct dlm_lock_resource *res = NULL; 223 struct dlm_lock_resource *res = NULL;
224 224
225 mlog_entry("%.*s\n", len, name); 225 mlog(0, "%.*s\n", len, name);
226 226
227 assert_spin_locked(&dlm->spinlock); 227 assert_spin_locked(&dlm->spinlock);
228 228
@@ -531,7 +531,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
531 unsigned int node; 531 unsigned int node;
532 struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; 532 struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf;
533 533
534 mlog_entry("%p %u %p", msg, len, data); 534 mlog(0, "%p %u %p", msg, len, data);
535 535
536 if (!dlm_grab(dlm)) 536 if (!dlm_grab(dlm))
537 return 0; 537 return 0;
@@ -926,9 +926,10 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
926} 926}
927 927
928static int dlm_match_regions(struct dlm_ctxt *dlm, 928static int dlm_match_regions(struct dlm_ctxt *dlm,
929 struct dlm_query_region *qr) 929 struct dlm_query_region *qr,
930 char *local, int locallen)
930{ 931{
931 char *local = NULL, *remote = qr->qr_regions; 932 char *remote = qr->qr_regions;
932 char *l, *r; 933 char *l, *r;
933 int localnr, i, j, foundit; 934 int localnr, i, j, foundit;
934 int status = 0; 935 int status = 0;
@@ -957,13 +958,8 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
957 r += O2HB_MAX_REGION_NAME_LEN; 958 r += O2HB_MAX_REGION_NAME_LEN;
958 } 959 }
959 960
960 local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC); 961 localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN);
961 if (!local) { 962 localnr = o2hb_get_all_regions(local, (u8)localnr);
962 status = -ENOMEM;
963 goto bail;
964 }
965
966 localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
967 963
968 /* compare local regions with remote */ 964 /* compare local regions with remote */
969 l = local; 965 l = local;
@@ -1012,8 +1008,6 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
1012 } 1008 }
1013 1009
1014bail: 1010bail:
1015 kfree(local);
1016
1017 return status; 1011 return status;
1018} 1012}
1019 1013
@@ -1075,6 +1069,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1075{ 1069{
1076 struct dlm_query_region *qr; 1070 struct dlm_query_region *qr;
1077 struct dlm_ctxt *dlm = NULL; 1071 struct dlm_ctxt *dlm = NULL;
1072 char *local = NULL;
1078 int status = 0; 1073 int status = 0;
1079 int locked = 0; 1074 int locked = 0;
1080 1075
@@ -1083,6 +1078,13 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1083 mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, 1078 mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
1084 qr->qr_domain); 1079 qr->qr_domain);
1085 1080
1081 /* buffer used in dlm_mast_regions() */
1082 local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
1083 if (!local) {
1084 status = -ENOMEM;
1085 goto bail;
1086 }
1087
1086 status = -EINVAL; 1088 status = -EINVAL;
1087 1089
1088 spin_lock(&dlm_domain_lock); 1090 spin_lock(&dlm_domain_lock);
@@ -1112,13 +1114,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1112 goto bail; 1114 goto bail;
1113 } 1115 }
1114 1116
1115 status = dlm_match_regions(dlm, qr); 1117 status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
1116 1118
1117bail: 1119bail:
1118 if (locked) 1120 if (locked)
1119 spin_unlock(&dlm->spinlock); 1121 spin_unlock(&dlm->spinlock);
1120 spin_unlock(&dlm_domain_lock); 1122 spin_unlock(&dlm_domain_lock);
1121 1123
1124 kfree(local);
1125
1122 return status; 1126 return status;
1123} 1127}
1124 1128
@@ -1553,7 +1557,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
1553 struct domain_join_ctxt *ctxt; 1557 struct domain_join_ctxt *ctxt;
1554 enum dlm_query_join_response_code response = JOIN_DISALLOW; 1558 enum dlm_query_join_response_code response = JOIN_DISALLOW;
1555 1559
1556 mlog_entry("%p", dlm); 1560 mlog(0, "%p", dlm);
1557 1561
1558 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 1562 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
1559 if (!ctxt) { 1563 if (!ctxt) {
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 7009292aac5a..8d39e0fd66f7 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -128,7 +128,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
128 int call_ast = 0, kick_thread = 0; 128 int call_ast = 0, kick_thread = 0;
129 enum dlm_status status = DLM_NORMAL; 129 enum dlm_status status = DLM_NORMAL;
130 130
131 mlog_entry("type=%d\n", lock->ml.type); 131 mlog(0, "type=%d\n", lock->ml.type);
132 132
133 spin_lock(&res->spinlock); 133 spin_lock(&res->spinlock);
134 /* if called from dlm_create_lock_handler, need to 134 /* if called from dlm_create_lock_handler, need to
@@ -227,8 +227,8 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
227 enum dlm_status status = DLM_DENIED; 227 enum dlm_status status = DLM_DENIED;
228 int lockres_changed = 1; 228 int lockres_changed = 1;
229 229
230 mlog_entry("type=%d\n", lock->ml.type); 230 mlog(0, "type=%d, lockres %.*s, flags = 0x%x\n",
231 mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len, 231 lock->ml.type, res->lockname.len,
232 res->lockname.name, flags); 232 res->lockname.name, flags);
233 233
234 spin_lock(&res->spinlock); 234 spin_lock(&res->spinlock);
@@ -308,8 +308,6 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
308 int tmpret, status = 0; 308 int tmpret, status = 0;
309 enum dlm_status ret; 309 enum dlm_status ret;
310 310
311 mlog_entry_void();
312
313 memset(&create, 0, sizeof(create)); 311 memset(&create, 0, sizeof(create));
314 create.node_idx = dlm->node_num; 312 create.node_idx = dlm->node_num;
315 create.requested_type = lock->ml.type; 313 create.requested_type = lock->ml.type;
@@ -477,8 +475,6 @@ int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
477 475
478 BUG_ON(!dlm); 476 BUG_ON(!dlm);
479 477
480 mlog_entry_void();
481
482 if (!dlm_grab(dlm)) 478 if (!dlm_grab(dlm))
483 return DLM_REJECTED; 479 return DLM_REJECTED;
484 480
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 59f0f6bdfc62..fede57ed005f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -426,8 +426,6 @@ static void dlm_mle_release(struct kref *kref)
426 struct dlm_master_list_entry *mle; 426 struct dlm_master_list_entry *mle;
427 struct dlm_ctxt *dlm; 427 struct dlm_ctxt *dlm;
428 428
429 mlog_entry_void();
430
431 mle = container_of(kref, struct dlm_master_list_entry, mle_refs); 429 mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
432 dlm = mle->dlm; 430 dlm = mle->dlm;
433 431
@@ -810,7 +808,7 @@ lookup:
810 dlm_mle_detach_hb_events(dlm, mle); 808 dlm_mle_detach_hb_events(dlm, mle);
811 dlm_put_mle(mle); 809 dlm_put_mle(mle);
812 mle = NULL; 810 mle = NULL;
813 /* this is lame, but we cant wait on either 811 /* this is lame, but we can't wait on either
814 * the mle or lockres waitqueue here */ 812 * the mle or lockres waitqueue here */
815 if (mig) 813 if (mig)
816 msleep(100); 814 msleep(100);
@@ -845,7 +843,7 @@ lookup:
845 843
846 /* finally add the lockres to its hash bucket */ 844 /* finally add the lockres to its hash bucket */
847 __dlm_insert_lockres(dlm, res); 845 __dlm_insert_lockres(dlm, res);
848 /* since this lockres is new it doesnt not require the spinlock */ 846 /* since this lockres is new it doesn't not require the spinlock */
849 dlm_lockres_grab_inflight_ref_new(dlm, res); 847 dlm_lockres_grab_inflight_ref_new(dlm, res);
850 848
851 /* if this node does not become the master make sure to drop 849 /* if this node does not become the master make sure to drop
@@ -3120,8 +3118,6 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
3120 3118
3121 *oldmle = NULL; 3119 *oldmle = NULL;
3122 3120
3123 mlog_entry_void();
3124
3125 assert_spin_locked(&dlm->spinlock); 3121 assert_spin_locked(&dlm->spinlock);
3126 assert_spin_locked(&dlm->master_lock); 3122 assert_spin_locked(&dlm->master_lock);
3127 3123
@@ -3261,7 +3257,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
3261 struct hlist_node *list; 3257 struct hlist_node *list;
3262 unsigned int i; 3258 unsigned int i;
3263 3259
3264 mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); 3260 mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node);
3265top: 3261top:
3266 assert_spin_locked(&dlm->spinlock); 3262 assert_spin_locked(&dlm->spinlock);
3267 3263
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index aaaffbcbe916..f1beb6fc254d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -727,7 +727,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
727 if (destroy) 727 if (destroy)
728 dlm_destroy_recovery_area(dlm, dead_node); 728 dlm_destroy_recovery_area(dlm, dead_node);
729 729
730 mlog_exit(status);
731 return status; 730 return status;
732} 731}
733 732
@@ -1496,9 +1495,9 @@ leave:
1496 kfree(buf); 1495 kfree(buf);
1497 if (item) 1496 if (item)
1498 kfree(item); 1497 kfree(item);
1498 mlog_errno(ret);
1499 } 1499 }
1500 1500
1501 mlog_exit(ret);
1502 return ret; 1501 return ret;
1503} 1502}
1504 1503
@@ -1567,7 +1566,6 @@ leave:
1567 dlm_lockres_put(res); 1566 dlm_lockres_put(res);
1568 } 1567 }
1569 kfree(data); 1568 kfree(data);
1570 mlog_exit(ret);
1571} 1569}
1572 1570
1573 1571
@@ -1986,7 +1984,6 @@ leave:
1986 dlm_lock_put(newlock); 1984 dlm_lock_put(newlock);
1987 } 1985 }
1988 1986
1989 mlog_exit(ret);
1990 return ret; 1987 return ret;
1991} 1988}
1992 1989
@@ -2083,8 +2080,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
2083 struct hlist_head *bucket; 2080 struct hlist_head *bucket;
2084 struct dlm_lock_resource *res, *next; 2081 struct dlm_lock_resource *res, *next;
2085 2082
2086 mlog_entry_void();
2087
2088 assert_spin_locked(&dlm->spinlock); 2083 assert_spin_locked(&dlm->spinlock);
2089 2084
2090 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { 2085 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
@@ -2607,8 +2602,6 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
2607 int nodenum; 2602 int nodenum;
2608 int status; 2603 int status;
2609 2604
2610 mlog_entry("%u\n", dead_node);
2611
2612 mlog(0, "%s: dead node is %u\n", dlm->name, dead_node); 2605 mlog(0, "%s: dead node is %u\n", dlm->name, dead_node);
2613 2606
2614 spin_lock(&dlm->spinlock); 2607 spin_lock(&dlm->spinlock);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 817287c6a6db..850aa7e87537 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -317,7 +317,7 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
317 struct kvec vec[2]; 317 struct kvec vec[2];
318 size_t veclen = 1; 318 size_t veclen = 1;
319 319
320 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 320 mlog(0, "%.*s\n", res->lockname.len, res->lockname.name);
321 321
322 if (owner == dlm->node_num) { 322 if (owner == dlm->node_num) {
323 /* ended up trying to contact ourself. this means 323 /* ended up trying to contact ourself. this means
@@ -588,8 +588,6 @@ enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb,
588 struct dlm_lock *lock = NULL; 588 struct dlm_lock *lock = NULL;
589 int call_ast, is_master; 589 int call_ast, is_master;
590 590
591 mlog_entry_void();
592
593 if (!lksb) { 591 if (!lksb) {
594 dlm_error(DLM_BADARGS); 592 dlm_error(DLM_BADARGS);
595 return DLM_BADARGS; 593 return DLM_BADARGS;
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index df69b4856d0d..f14be89a6701 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o 3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
4 4
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e8d94d722ecb..7642d7ca73e5 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -64,7 +64,7 @@ struct ocfs2_mask_waiter {
64 unsigned long mw_mask; 64 unsigned long mw_mask;
65 unsigned long mw_goal; 65 unsigned long mw_goal;
66#ifdef CONFIG_OCFS2_FS_STATS 66#ifdef CONFIG_OCFS2_FS_STATS
67 unsigned long long mw_lock_start; 67 ktime_t mw_lock_start;
68#endif 68#endif
69}; 69};
70 70
@@ -397,8 +397,6 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
397{ 397{
398 int len; 398 int len;
399 399
400 mlog_entry_void();
401
402 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 400 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
403 401
404 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 402 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
@@ -408,8 +406,6 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
408 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 406 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
409 407
410 mlog(0, "built lock resource with name: %s\n", name); 408 mlog(0, "built lock resource with name: %s\n", name);
411
412 mlog_exit_void();
413} 409}
414 410
415static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 411static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
@@ -435,44 +431,41 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
435#ifdef CONFIG_OCFS2_FS_STATS 431#ifdef CONFIG_OCFS2_FS_STATS
436static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 432static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
437{ 433{
438 res->l_lock_num_prmode = 0;
439 res->l_lock_num_prmode_failed = 0;
440 res->l_lock_total_prmode = 0;
441 res->l_lock_max_prmode = 0;
442 res->l_lock_num_exmode = 0;
443 res->l_lock_num_exmode_failed = 0;
444 res->l_lock_total_exmode = 0;
445 res->l_lock_max_exmode = 0;
446 res->l_lock_refresh = 0; 434 res->l_lock_refresh = 0;
435 memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
436 memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
447} 437}
448 438
449static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 439static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
450 struct ocfs2_mask_waiter *mw, int ret) 440 struct ocfs2_mask_waiter *mw, int ret)
451{ 441{
452 unsigned long long *num, *sum; 442 u32 usec;
453 unsigned int *max, *failed; 443 ktime_t kt;
454 struct timespec ts = current_kernel_time(); 444 struct ocfs2_lock_stats *stats;
455 unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; 445
456 446 if (level == LKM_PRMODE)
457 if (level == LKM_PRMODE) { 447 stats = &res->l_lock_prmode;
458 num = &res->l_lock_num_prmode; 448 else if (level == LKM_EXMODE)
459 sum = &res->l_lock_total_prmode; 449 stats = &res->l_lock_exmode;
460 max = &res->l_lock_max_prmode; 450 else
461 failed = &res->l_lock_num_prmode_failed;
462 } else if (level == LKM_EXMODE) {
463 num = &res->l_lock_num_exmode;
464 sum = &res->l_lock_total_exmode;
465 max = &res->l_lock_max_exmode;
466 failed = &res->l_lock_num_exmode_failed;
467 } else
468 return; 451 return;
469 452
470 (*num)++; 453 kt = ktime_sub(ktime_get(), mw->mw_lock_start);
471 (*sum) += time; 454 usec = ktime_to_us(kt);
472 if (time > *max) 455
473 *max = time; 456 stats->ls_gets++;
457 stats->ls_total += ktime_to_ns(kt);
458 /* overflow */
459 if (unlikely(stats->ls_gets) == 0) {
460 stats->ls_gets++;
461 stats->ls_total = ktime_to_ns(kt);
462 }
463
464 if (stats->ls_max < usec)
465 stats->ls_max = usec;
466
474 if (ret) 467 if (ret)
475 (*failed)++; 468 stats->ls_fail++;
476} 469}
477 470
478static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 471static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
@@ -482,8 +475,7 @@ static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
482 475
483static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 476static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
484{ 477{
485 struct timespec ts = current_kernel_time(); 478 mw->mw_lock_start = ktime_get();
486 mw->mw_lock_start = timespec_to_ns(&ts);
487} 479}
488#else 480#else
489static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 481static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
@@ -729,8 +721,6 @@ void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
729 721
730void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 722void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
731{ 723{
732 mlog_entry_void();
733
734 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 724 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
735 return; 725 return;
736 726
@@ -756,14 +746,11 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
756 memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 746 memset(&res->l_lksb, 0, sizeof(res->l_lksb));
757 747
758 res->l_flags = 0UL; 748 res->l_flags = 0UL;
759 mlog_exit_void();
760} 749}
761 750
762static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 751static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
763 int level) 752 int level)
764{ 753{
765 mlog_entry_void();
766
767 BUG_ON(!lockres); 754 BUG_ON(!lockres);
768 755
769 switch(level) { 756 switch(level) {
@@ -776,15 +763,11 @@ static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
776 default: 763 default:
777 BUG(); 764 BUG();
778 } 765 }
779
780 mlog_exit_void();
781} 766}
782 767
783static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 768static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
784 int level) 769 int level)
785{ 770{
786 mlog_entry_void();
787
788 BUG_ON(!lockres); 771 BUG_ON(!lockres);
789 772
790 switch(level) { 773 switch(level) {
@@ -799,7 +782,6 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
799 default: 782 default:
800 BUG(); 783 BUG();
801 } 784 }
802 mlog_exit_void();
803} 785}
804 786
805/* WARNING: This function lives in a world where the only three lock 787/* WARNING: This function lives in a world where the only three lock
@@ -846,8 +828,6 @@ static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
846 828
847static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 829static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
848{ 830{
849 mlog_entry_void();
850
851 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 831 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
852 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 832 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
853 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 833 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
@@ -860,14 +840,10 @@ static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res
860 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 840 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
861 } 841 }
862 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 842 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
863
864 mlog_exit_void();
865} 843}
866 844
867static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 845static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
868{ 846{
869 mlog_entry_void();
870
871 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 847 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
872 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 848 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
873 849
@@ -889,14 +865,10 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
889 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 865 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
890 866
891 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 867 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
892
893 mlog_exit_void();
894} 868}
895 869
896static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 870static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
897{ 871{
898 mlog_entry_void();
899
900 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 872 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
901 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 873 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
902 874
@@ -908,15 +880,12 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
908 lockres->l_level = lockres->l_requested; 880 lockres->l_level = lockres->l_requested;
909 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 881 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
910 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 882 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
911
912 mlog_exit_void();
913} 883}
914 884
915static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 885static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
916 int level) 886 int level)
917{ 887{
918 int needs_downconvert = 0; 888 int needs_downconvert = 0;
919 mlog_entry_void();
920 889
921 assert_spin_locked(&lockres->l_lock); 890 assert_spin_locked(&lockres->l_lock);
922 891
@@ -938,8 +907,7 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
938 907
939 if (needs_downconvert) 908 if (needs_downconvert)
940 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 909 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
941 910 mlog(0, "needs_downconvert = %d\n", needs_downconvert);
942 mlog_exit(needs_downconvert);
943 return needs_downconvert; 911 return needs_downconvert;
944} 912}
945 913
@@ -1151,8 +1119,6 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1151 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1119 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1152 unsigned long flags; 1120 unsigned long flags;
1153 1121
1154 mlog_entry_void();
1155
1156 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 1122 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
1157 lockres->l_name, lockres->l_unlock_action); 1123 lockres->l_name, lockres->l_unlock_action);
1158 1124
@@ -1162,7 +1128,6 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1162 "unlock_action %d\n", error, lockres->l_name, 1128 "unlock_action %d\n", error, lockres->l_name,
1163 lockres->l_unlock_action); 1129 lockres->l_unlock_action);
1164 spin_unlock_irqrestore(&lockres->l_lock, flags); 1130 spin_unlock_irqrestore(&lockres->l_lock, flags);
1165 mlog_exit_void();
1166 return; 1131 return;
1167 } 1132 }
1168 1133
@@ -1186,8 +1151,6 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1186 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1151 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1187 wake_up(&lockres->l_event); 1152 wake_up(&lockres->l_event);
1188 spin_unlock_irqrestore(&lockres->l_lock, flags); 1153 spin_unlock_irqrestore(&lockres->l_lock, flags);
1189
1190 mlog_exit_void();
1191} 1154}
1192 1155
1193/* 1156/*
@@ -1233,7 +1196,6 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1233{ 1196{
1234 unsigned long flags; 1197 unsigned long flags;
1235 1198
1236 mlog_entry_void();
1237 spin_lock_irqsave(&lockres->l_lock, flags); 1199 spin_lock_irqsave(&lockres->l_lock, flags);
1238 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1200 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1239 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1201 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
@@ -1244,7 +1206,6 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1244 spin_unlock_irqrestore(&lockres->l_lock, flags); 1206 spin_unlock_irqrestore(&lockres->l_lock, flags);
1245 1207
1246 wake_up(&lockres->l_event); 1208 wake_up(&lockres->l_event);
1247 mlog_exit_void();
1248} 1209}
1249 1210
1250/* Note: If we detect another process working on the lock (i.e., 1211/* Note: If we detect another process working on the lock (i.e.,
@@ -1260,8 +1221,6 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
1260 unsigned long flags; 1221 unsigned long flags;
1261 unsigned int gen; 1222 unsigned int gen;
1262 1223
1263 mlog_entry_void();
1264
1265 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1224 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1266 dlm_flags); 1225 dlm_flags);
1267 1226
@@ -1293,7 +1252,6 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
1293 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1252 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1294 1253
1295bail: 1254bail:
1296 mlog_exit(ret);
1297 return ret; 1255 return ret;
1298} 1256}
1299 1257
@@ -1416,8 +1374,6 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1416 unsigned int gen; 1374 unsigned int gen;
1417 int noqueue_attempted = 0; 1375 int noqueue_attempted = 0;
1418 1376
1419 mlog_entry_void();
1420
1421 ocfs2_init_mask_waiter(&mw); 1377 ocfs2_init_mask_waiter(&mw);
1422 1378
1423 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1379 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
@@ -1583,7 +1539,6 @@ out:
1583 caller_ip); 1539 caller_ip);
1584 } 1540 }
1585#endif 1541#endif
1586 mlog_exit(ret);
1587 return ret; 1542 return ret;
1588} 1543}
1589 1544
@@ -1605,7 +1560,6 @@ static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1605{ 1560{
1606 unsigned long flags; 1561 unsigned long flags;
1607 1562
1608 mlog_entry_void();
1609 spin_lock_irqsave(&lockres->l_lock, flags); 1563 spin_lock_irqsave(&lockres->l_lock, flags);
1610 ocfs2_dec_holders(lockres, level); 1564 ocfs2_dec_holders(lockres, level);
1611 ocfs2_downconvert_on_unlock(osb, lockres); 1565 ocfs2_downconvert_on_unlock(osb, lockres);
@@ -1614,7 +1568,6 @@ static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1614 if (lockres->l_lockdep_map.key != NULL) 1568 if (lockres->l_lockdep_map.key != NULL)
1615 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1569 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1616#endif 1570#endif
1617 mlog_exit_void();
1618} 1571}
1619 1572
1620static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1573static int ocfs2_create_new_lock(struct ocfs2_super *osb,
@@ -1648,8 +1601,6 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1648 BUG_ON(!inode); 1601 BUG_ON(!inode);
1649 BUG_ON(!ocfs2_inode_is_new(inode)); 1602 BUG_ON(!ocfs2_inode_is_new(inode));
1650 1603
1651 mlog_entry_void();
1652
1653 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1604 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1654 1605
1655 /* NOTE: That we don't increment any of the holder counts, nor 1606 /* NOTE: That we don't increment any of the holder counts, nor
@@ -1683,7 +1634,6 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1683 } 1634 }
1684 1635
1685bail: 1636bail:
1686 mlog_exit(ret);
1687 return ret; 1637 return ret;
1688} 1638}
1689 1639
@@ -1695,16 +1645,12 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1695 1645
1696 BUG_ON(!inode); 1646 BUG_ON(!inode);
1697 1647
1698 mlog_entry_void();
1699
1700 mlog(0, "inode %llu take %s RW lock\n", 1648 mlog(0, "inode %llu take %s RW lock\n",
1701 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1649 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1702 write ? "EXMODE" : "PRMODE"); 1650 write ? "EXMODE" : "PRMODE");
1703 1651
1704 if (ocfs2_mount_local(osb)) { 1652 if (ocfs2_mount_local(osb))
1705 mlog_exit(0);
1706 return 0; 1653 return 0;
1707 }
1708 1654
1709 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1655 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1710 1656
@@ -1715,7 +1661,6 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1715 if (status < 0) 1661 if (status < 0)
1716 mlog_errno(status); 1662 mlog_errno(status);
1717 1663
1718 mlog_exit(status);
1719 return status; 1664 return status;
1720} 1665}
1721 1666
@@ -1725,16 +1670,12 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
1725 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1670 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1726 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1671 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1727 1672
1728 mlog_entry_void();
1729
1730 mlog(0, "inode %llu drop %s RW lock\n", 1673 mlog(0, "inode %llu drop %s RW lock\n",
1731 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1674 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1732 write ? "EXMODE" : "PRMODE"); 1675 write ? "EXMODE" : "PRMODE");
1733 1676
1734 if (!ocfs2_mount_local(osb)) 1677 if (!ocfs2_mount_local(osb))
1735 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1678 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1736
1737 mlog_exit_void();
1738} 1679}
1739 1680
1740/* 1681/*
@@ -1748,8 +1689,6 @@ int ocfs2_open_lock(struct inode *inode)
1748 1689
1749 BUG_ON(!inode); 1690 BUG_ON(!inode);
1750 1691
1751 mlog_entry_void();
1752
1753 mlog(0, "inode %llu take PRMODE open lock\n", 1692 mlog(0, "inode %llu take PRMODE open lock\n",
1754 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1693 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1755 1694
@@ -1764,7 +1703,6 @@ int ocfs2_open_lock(struct inode *inode)
1764 mlog_errno(status); 1703 mlog_errno(status);
1765 1704
1766out: 1705out:
1767 mlog_exit(status);
1768 return status; 1706 return status;
1769} 1707}
1770 1708
@@ -1776,8 +1714,6 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
1776 1714
1777 BUG_ON(!inode); 1715 BUG_ON(!inode);
1778 1716
1779 mlog_entry_void();
1780
1781 mlog(0, "inode %llu try to take %s open lock\n", 1717 mlog(0, "inode %llu try to take %s open lock\n",
1782 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1718 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1783 write ? "EXMODE" : "PRMODE"); 1719 write ? "EXMODE" : "PRMODE");
@@ -1799,7 +1735,6 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
1799 level, DLM_LKF_NOQUEUE, 0); 1735 level, DLM_LKF_NOQUEUE, 0);
1800 1736
1801out: 1737out:
1802 mlog_exit(status);
1803 return status; 1738 return status;
1804} 1739}
1805 1740
@@ -1811,8 +1746,6 @@ void ocfs2_open_unlock(struct inode *inode)
1811 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 1746 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
1812 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1747 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1813 1748
1814 mlog_entry_void();
1815
1816 mlog(0, "inode %llu drop open lock\n", 1749 mlog(0, "inode %llu drop open lock\n",
1817 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1750 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1818 1751
@@ -1827,7 +1760,7 @@ void ocfs2_open_unlock(struct inode *inode)
1827 DLM_LOCK_EX); 1760 DLM_LOCK_EX);
1828 1761
1829out: 1762out:
1830 mlog_exit_void(); 1763 return;
1831} 1764}
1832 1765
1833static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1766static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
@@ -2043,8 +1976,6 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2043{ 1976{
2044 int kick = 0; 1977 int kick = 0;
2045 1978
2046 mlog_entry_void();
2047
2048 /* If we know that another node is waiting on our lock, kick 1979 /* If we know that another node is waiting on our lock, kick
2049 * the downconvert thread * pre-emptively when we reach a release 1980 * the downconvert thread * pre-emptively when we reach a release
2050 * condition. */ 1981 * condition. */
@@ -2065,8 +1996,6 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2065 1996
2066 if (kick) 1997 if (kick)
2067 ocfs2_wake_downconvert_thread(osb); 1998 ocfs2_wake_downconvert_thread(osb);
2068
2069 mlog_exit_void();
2070} 1999}
2071 2000
2072#define OCFS2_SEC_BITS 34 2001#define OCFS2_SEC_BITS 34
@@ -2095,8 +2024,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2095 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2024 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2096 struct ocfs2_meta_lvb *lvb; 2025 struct ocfs2_meta_lvb *lvb;
2097 2026
2098 mlog_entry_void();
2099
2100 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2027 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2101 2028
2102 /* 2029 /*
@@ -2128,8 +2055,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2128 2055
2129out: 2056out:
2130 mlog_meta_lvb(0, lockres); 2057 mlog_meta_lvb(0, lockres);
2131
2132 mlog_exit_void();
2133} 2058}
2134 2059
2135static void ocfs2_unpack_timespec(struct timespec *spec, 2060static void ocfs2_unpack_timespec(struct timespec *spec,
@@ -2145,8 +2070,6 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2145 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2070 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2146 struct ocfs2_meta_lvb *lvb; 2071 struct ocfs2_meta_lvb *lvb;
2147 2072
2148 mlog_entry_void();
2149
2150 mlog_meta_lvb(0, lockres); 2073 mlog_meta_lvb(0, lockres);
2151 2074
2152 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2075 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
@@ -2177,8 +2100,6 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2177 ocfs2_unpack_timespec(&inode->i_ctime, 2100 ocfs2_unpack_timespec(&inode->i_ctime,
2178 be64_to_cpu(lvb->lvb_ictime_packed)); 2101 be64_to_cpu(lvb->lvb_ictime_packed));
2179 spin_unlock(&oi->ip_lock); 2102 spin_unlock(&oi->ip_lock);
2180
2181 mlog_exit_void();
2182} 2103}
2183 2104
2184static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2105static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
@@ -2205,8 +2126,6 @@ static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2205 unsigned long flags; 2126 unsigned long flags;
2206 int status = 0; 2127 int status = 0;
2207 2128
2208 mlog_entry_void();
2209
2210refresh_check: 2129refresh_check:
2211 spin_lock_irqsave(&lockres->l_lock, flags); 2130 spin_lock_irqsave(&lockres->l_lock, flags);
2212 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2131 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
@@ -2227,7 +2146,7 @@ refresh_check:
2227 2146
2228 status = 1; 2147 status = 1;
2229bail: 2148bail:
2230 mlog_exit(status); 2149 mlog(0, "status %d\n", status);
2231 return status; 2150 return status;
2232} 2151}
2233 2152
@@ -2237,7 +2156,6 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre
2237 int status) 2156 int status)
2238{ 2157{
2239 unsigned long flags; 2158 unsigned long flags;
2240 mlog_entry_void();
2241 2159
2242 spin_lock_irqsave(&lockres->l_lock, flags); 2160 spin_lock_irqsave(&lockres->l_lock, flags);
2243 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2161 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
@@ -2246,8 +2164,6 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre
2246 spin_unlock_irqrestore(&lockres->l_lock, flags); 2164 spin_unlock_irqrestore(&lockres->l_lock, flags);
2247 2165
2248 wake_up(&lockres->l_event); 2166 wake_up(&lockres->l_event);
2249
2250 mlog_exit_void();
2251} 2167}
2252 2168
2253/* may or may not return a bh if it went to disk. */ 2169/* may or may not return a bh if it went to disk. */
@@ -2260,8 +2176,6 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2260 struct ocfs2_dinode *fe; 2176 struct ocfs2_dinode *fe;
2261 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2177 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2262 2178
2263 mlog_entry_void();
2264
2265 if (ocfs2_mount_local(osb)) 2179 if (ocfs2_mount_local(osb))
2266 goto bail; 2180 goto bail;
2267 2181
@@ -2330,7 +2244,6 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2330bail_refresh: 2244bail_refresh:
2331 ocfs2_complete_lock_res_refresh(lockres, status); 2245 ocfs2_complete_lock_res_refresh(lockres, status);
2332bail: 2246bail:
2333 mlog_exit(status);
2334 return status; 2247 return status;
2335} 2248}
2336 2249
@@ -2374,8 +2287,6 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
2374 2287
2375 BUG_ON(!inode); 2288 BUG_ON(!inode);
2376 2289
2377 mlog_entry_void();
2378
2379 mlog(0, "inode %llu, take %s META lock\n", 2290 mlog(0, "inode %llu, take %s META lock\n",
2380 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2291 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2381 ex ? "EXMODE" : "PRMODE"); 2292 ex ? "EXMODE" : "PRMODE");
@@ -2467,7 +2378,6 @@ bail:
2467 if (local_bh) 2378 if (local_bh)
2468 brelse(local_bh); 2379 brelse(local_bh);
2469 2380
2470 mlog_exit(status);
2471 return status; 2381 return status;
2472} 2382}
2473 2383
@@ -2517,7 +2427,6 @@ int ocfs2_inode_lock_atime(struct inode *inode,
2517{ 2427{
2518 int ret; 2428 int ret;
2519 2429
2520 mlog_entry_void();
2521 ret = ocfs2_inode_lock(inode, NULL, 0); 2430 ret = ocfs2_inode_lock(inode, NULL, 0);
2522 if (ret < 0) { 2431 if (ret < 0) {
2523 mlog_errno(ret); 2432 mlog_errno(ret);
@@ -2545,7 +2454,6 @@ int ocfs2_inode_lock_atime(struct inode *inode,
2545 } else 2454 } else
2546 *level = 0; 2455 *level = 0;
2547 2456
2548 mlog_exit(ret);
2549 return ret; 2457 return ret;
2550} 2458}
2551 2459
@@ -2556,8 +2464,6 @@ void ocfs2_inode_unlock(struct inode *inode,
2556 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2464 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2557 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2465 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2558 2466
2559 mlog_entry_void();
2560
2561 mlog(0, "inode %llu drop %s META lock\n", 2467 mlog(0, "inode %llu drop %s META lock\n",
2562 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2468 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2563 ex ? "EXMODE" : "PRMODE"); 2469 ex ? "EXMODE" : "PRMODE");
@@ -2565,8 +2471,6 @@ void ocfs2_inode_unlock(struct inode *inode,
2565 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2471 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2566 !ocfs2_mount_local(osb)) 2472 !ocfs2_mount_local(osb))
2567 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2473 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2568
2569 mlog_exit_void();
2570} 2474}
2571 2475
2572int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 2476int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
@@ -2617,8 +2521,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2617 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2521 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2618 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2522 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2619 2523
2620 mlog_entry_void();
2621
2622 if (ocfs2_is_hard_readonly(osb)) 2524 if (ocfs2_is_hard_readonly(osb))
2623 return -EROFS; 2525 return -EROFS;
2624 2526
@@ -2650,7 +2552,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2650 ocfs2_track_lock_refresh(lockres); 2552 ocfs2_track_lock_refresh(lockres);
2651 } 2553 }
2652bail: 2554bail:
2653 mlog_exit(status);
2654 return status; 2555 return status;
2655} 2556}
2656 2557
@@ -2869,8 +2770,15 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2869 return iter; 2770 return iter;
2870} 2771}
2871 2772
2872/* So that debugfs.ocfs2 can determine which format is being used */ 2773/*
2873#define OCFS2_DLM_DEBUG_STR_VERSION 2 2774 * Version is used by debugfs.ocfs2 to determine the format being used
2775 *
2776 * New in version 2
2777 * - Lock stats printed
2778 * New in version 3
2779 * - Max time in lock stats is in usecs (instead of nsecs)
2780 */
2781#define OCFS2_DLM_DEBUG_STR_VERSION 3
2874static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2782static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2875{ 2783{
2876 int i; 2784 int i;
@@ -2912,18 +2820,18 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2912 seq_printf(m, "0x%x\t", lvb[i]); 2820 seq_printf(m, "0x%x\t", lvb[i]);
2913 2821
2914#ifdef CONFIG_OCFS2_FS_STATS 2822#ifdef CONFIG_OCFS2_FS_STATS
2915# define lock_num_prmode(_l) (_l)->l_lock_num_prmode 2823# define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets)
2916# define lock_num_exmode(_l) (_l)->l_lock_num_exmode 2824# define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets)
2917# define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 2825# define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail)
2918# define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 2826# define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail)
2919# define lock_total_prmode(_l) (_l)->l_lock_total_prmode 2827# define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total)
2920# define lock_total_exmode(_l) (_l)->l_lock_total_exmode 2828# define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total)
2921# define lock_max_prmode(_l) (_l)->l_lock_max_prmode 2829# define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max)
2922# define lock_max_exmode(_l) (_l)->l_lock_max_exmode 2830# define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max)
2923# define lock_refresh(_l) (_l)->l_lock_refresh 2831# define lock_refresh(_l) ((_l)->l_lock_refresh)
2924#else 2832#else
2925# define lock_num_prmode(_l) (0ULL) 2833# define lock_num_prmode(_l) (0)
2926# define lock_num_exmode(_l) (0ULL) 2834# define lock_num_exmode(_l) (0)
2927# define lock_num_prmode_failed(_l) (0) 2835# define lock_num_prmode_failed(_l) (0)
2928# define lock_num_exmode_failed(_l) (0) 2836# define lock_num_exmode_failed(_l) (0)
2929# define lock_total_prmode(_l) (0ULL) 2837# define lock_total_prmode(_l) (0ULL)
@@ -2933,8 +2841,8 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2933# define lock_refresh(_l) (0) 2841# define lock_refresh(_l) (0)
2934#endif 2842#endif
2935 /* The following seq_print was added in version 2 of this output */ 2843 /* The following seq_print was added in version 2 of this output */
2936 seq_printf(m, "%llu\t" 2844 seq_printf(m, "%u\t"
2937 "%llu\t" 2845 "%u\t"
2938 "%u\t" 2846 "%u\t"
2939 "%u\t" 2847 "%u\t"
2940 "%llu\t" 2848 "%llu\t"
@@ -3054,8 +2962,6 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
3054 int status = 0; 2962 int status = 0;
3055 struct ocfs2_cluster_connection *conn = NULL; 2963 struct ocfs2_cluster_connection *conn = NULL;
3056 2964
3057 mlog_entry_void();
3058
3059 if (ocfs2_mount_local(osb)) { 2965 if (ocfs2_mount_local(osb)) {
3060 osb->node_num = 0; 2966 osb->node_num = 0;
3061 goto local; 2967 goto local;
@@ -3112,15 +3018,12 @@ bail:
3112 kthread_stop(osb->dc_task); 3018 kthread_stop(osb->dc_task);
3113 } 3019 }
3114 3020
3115 mlog_exit(status);
3116 return status; 3021 return status;
3117} 3022}
3118 3023
3119void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3024void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3120 int hangup_pending) 3025 int hangup_pending)
3121{ 3026{
3122 mlog_entry_void();
3123
3124 ocfs2_drop_osb_locks(osb); 3027 ocfs2_drop_osb_locks(osb);
3125 3028
3126 /* 3029 /*
@@ -3143,8 +3046,6 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3143 osb->cconn = NULL; 3046 osb->cconn = NULL;
3144 3047
3145 ocfs2_dlm_shutdown_debug(osb); 3048 ocfs2_dlm_shutdown_debug(osb);
3146
3147 mlog_exit_void();
3148} 3049}
3149 3050
3150static int ocfs2_drop_lock(struct ocfs2_super *osb, 3051static int ocfs2_drop_lock(struct ocfs2_super *osb,
@@ -3226,7 +3127,6 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
3226 3127
3227 ocfs2_wait_on_busy_lock(lockres); 3128 ocfs2_wait_on_busy_lock(lockres);
3228out: 3129out:
3229 mlog_exit(0);
3230 return 0; 3130 return 0;
3231} 3131}
3232 3132
@@ -3284,8 +3184,6 @@ int ocfs2_drop_inode_locks(struct inode *inode)
3284{ 3184{
3285 int status, err; 3185 int status, err;
3286 3186
3287 mlog_entry_void();
3288
3289 /* No need to call ocfs2_mark_lockres_freeing here - 3187 /* No need to call ocfs2_mark_lockres_freeing here -
3290 * ocfs2_clear_inode has done it for us. */ 3188 * ocfs2_clear_inode has done it for us. */
3291 3189
@@ -3310,7 +3208,6 @@ int ocfs2_drop_inode_locks(struct inode *inode)
3310 if (err < 0 && !status) 3208 if (err < 0 && !status)
3311 status = err; 3209 status = err;
3312 3210
3313 mlog_exit(status);
3314 return status; 3211 return status;
3315} 3212}
3316 3213
@@ -3352,8 +3249,6 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3352 int ret; 3249 int ret;
3353 u32 dlm_flags = DLM_LKF_CONVERT; 3250 u32 dlm_flags = DLM_LKF_CONVERT;
3354 3251
3355 mlog_entry_void();
3356
3357 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 3252 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
3358 lockres->l_level, new_level); 3253 lockres->l_level, new_level);
3359 3254
@@ -3375,7 +3270,6 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3375 3270
3376 ret = 0; 3271 ret = 0;
3377bail: 3272bail:
3378 mlog_exit(ret);
3379 return ret; 3273 return ret;
3380} 3274}
3381 3275
@@ -3385,8 +3279,6 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3385{ 3279{
3386 assert_spin_locked(&lockres->l_lock); 3280 assert_spin_locked(&lockres->l_lock);
3387 3281
3388 mlog_entry_void();
3389
3390 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3282 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3391 /* If we're already trying to cancel a lock conversion 3283 /* If we're already trying to cancel a lock conversion
3392 * then just drop the spinlock and allow the caller to 3284 * then just drop the spinlock and allow the caller to
@@ -3416,8 +3308,6 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3416{ 3308{
3417 int ret; 3309 int ret;
3418 3310
3419 mlog_entry_void();
3420
3421 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3311 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3422 DLM_LKF_CANCEL); 3312 DLM_LKF_CANCEL);
3423 if (ret) { 3313 if (ret) {
@@ -3427,7 +3317,6 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3427 3317
3428 mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3318 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3429 3319
3430 mlog_exit(ret);
3431 return ret; 3320 return ret;
3432} 3321}
3433 3322
@@ -3443,8 +3332,6 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3443 int set_lvb = 0; 3332 int set_lvb = 0;
3444 unsigned int gen; 3333 unsigned int gen;
3445 3334
3446 mlog_entry_void();
3447
3448 spin_lock_irqsave(&lockres->l_lock, flags); 3335 spin_lock_irqsave(&lockres->l_lock, flags);
3449 3336
3450recheck: 3337recheck:
@@ -3619,14 +3506,14 @@ downconvert:
3619 gen); 3506 gen);
3620 3507
3621leave: 3508leave:
3622 mlog_exit(ret); 3509 if (ret)
3510 mlog_errno(ret);
3623 return ret; 3511 return ret;
3624 3512
3625leave_requeue: 3513leave_requeue:
3626 spin_unlock_irqrestore(&lockres->l_lock, flags); 3514 spin_unlock_irqrestore(&lockres->l_lock, flags);
3627 ctl->requeue = 1; 3515 ctl->requeue = 1;
3628 3516
3629 mlog_exit(0);
3630 return 0; 3517 return 0;
3631} 3518}
3632 3519
@@ -3859,8 +3746,6 @@ static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3859 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3746 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3860 oinfo->dqi_gi.dqi_type); 3747 oinfo->dqi_gi.dqi_type);
3861 3748
3862 mlog_entry_void();
3863
3864 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3749 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3865 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 3750 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
3866 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 3751 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
@@ -3869,8 +3754,6 @@ static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3869 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 3754 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
3870 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 3755 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
3871 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 3756 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
3872
3873 mlog_exit_void();
3874} 3757}
3875 3758
3876void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3759void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
@@ -3879,10 +3762,8 @@ void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3879 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3762 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3880 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3763 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3881 3764
3882 mlog_entry_void();
3883 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 3765 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3884 ocfs2_cluster_unlock(osb, lockres, level); 3766 ocfs2_cluster_unlock(osb, lockres, level);
3885 mlog_exit_void();
3886} 3767}
3887 3768
3888static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 3769static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
@@ -3937,8 +3818,6 @@ int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3937 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3818 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3938 int status = 0; 3819 int status = 0;
3939 3820
3940 mlog_entry_void();
3941
3942 /* On RO devices, locking really isn't needed... */ 3821 /* On RO devices, locking really isn't needed... */
3943 if (ocfs2_is_hard_readonly(osb)) { 3822 if (ocfs2_is_hard_readonly(osb)) {
3944 if (ex) 3823 if (ex)
@@ -3961,7 +3840,6 @@ int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3961 ocfs2_qinfo_unlock(oinfo, ex); 3840 ocfs2_qinfo_unlock(oinfo, ex);
3962 ocfs2_complete_lock_res_refresh(lockres, status); 3841 ocfs2_complete_lock_res_refresh(lockres, status);
3963bail: 3842bail:
3964 mlog_exit(status);
3965 return status; 3843 return status;
3966} 3844}
3967 3845
@@ -4007,8 +3885,6 @@ static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
4007 * considered valid until we remove the OCFS2_LOCK_QUEUED 3885 * considered valid until we remove the OCFS2_LOCK_QUEUED
4008 * flag. */ 3886 * flag. */
4009 3887
4010 mlog_entry_void();
4011
4012 BUG_ON(!lockres); 3888 BUG_ON(!lockres);
4013 BUG_ON(!lockres->l_ops); 3889 BUG_ON(!lockres->l_ops);
4014 3890
@@ -4042,15 +3918,11 @@ unqueue:
4042 if (ctl.unblock_action != UNBLOCK_CONTINUE 3918 if (ctl.unblock_action != UNBLOCK_CONTINUE
4043 && lockres->l_ops->post_unlock) 3919 && lockres->l_ops->post_unlock)
4044 lockres->l_ops->post_unlock(osb, lockres); 3920 lockres->l_ops->post_unlock(osb, lockres);
4045
4046 mlog_exit_void();
4047} 3921}
4048 3922
4049static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3923static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4050 struct ocfs2_lock_res *lockres) 3924 struct ocfs2_lock_res *lockres)
4051{ 3925{
4052 mlog_entry_void();
4053
4054 assert_spin_locked(&lockres->l_lock); 3926 assert_spin_locked(&lockres->l_lock);
4055 3927
4056 if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3928 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
@@ -4071,8 +3943,6 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4071 osb->blocked_lock_count++; 3943 osb->blocked_lock_count++;
4072 } 3944 }
4073 spin_unlock(&osb->dc_task_lock); 3945 spin_unlock(&osb->dc_task_lock);
4074
4075 mlog_exit_void();
4076} 3946}
4077 3947
4078static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 3948static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
@@ -4080,8 +3950,6 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
4080 unsigned long processed; 3950 unsigned long processed;
4081 struct ocfs2_lock_res *lockres; 3951 struct ocfs2_lock_res *lockres;
4082 3952
4083 mlog_entry_void();
4084
4085 spin_lock(&osb->dc_task_lock); 3953 spin_lock(&osb->dc_task_lock);
4086 /* grab this early so we know to try again if a state change and 3954 /* grab this early so we know to try again if a state change and
4087 * wake happens part-way through our work */ 3955 * wake happens part-way through our work */
@@ -4105,8 +3973,6 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
4105 spin_lock(&osb->dc_task_lock); 3973 spin_lock(&osb->dc_task_lock);
4106 } 3974 }
4107 spin_unlock(&osb->dc_task_lock); 3975 spin_unlock(&osb->dc_task_lock);
4108
4109 mlog_exit_void();
4110} 3976}
4111 3977
4112static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 3978static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 254652a9b542..745db42528d5 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/types.h> 27#include <linux/types.h>
28 28
29#define MLOG_MASK_PREFIX ML_EXPORT
30#include <cluster/masklog.h> 29#include <cluster/masklog.h>
31 30
32#include "ocfs2.h" 31#include "ocfs2.h"
@@ -40,6 +39,7 @@
40 39
41#include "buffer_head_io.h" 40#include "buffer_head_io.h"
42#include "suballoc.h" 41#include "suballoc.h"
42#include "ocfs2_trace.h"
43 43
44struct ocfs2_inode_handle 44struct ocfs2_inode_handle
45{ 45{
@@ -56,10 +56,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
56 int status, set; 56 int status, set;
57 struct dentry *result; 57 struct dentry *result;
58 58
59 mlog_entry("(0x%p, 0x%p)\n", sb, handle); 59 trace_ocfs2_get_dentry_begin(sb, handle, (unsigned long long)blkno);
60 60
61 if (blkno == 0) { 61 if (blkno == 0) {
62 mlog(0, "nfs wants inode with blkno: 0\n");
63 result = ERR_PTR(-ESTALE); 62 result = ERR_PTR(-ESTALE);
64 goto bail; 63 goto bail;
65 } 64 }
@@ -83,6 +82,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
83 } 82 }
84 83
85 status = ocfs2_test_inode_bit(osb, blkno, &set); 84 status = ocfs2_test_inode_bit(osb, blkno, &set);
85 trace_ocfs2_get_dentry_test_bit(status, set);
86 if (status < 0) { 86 if (status < 0) {
87 if (status == -EINVAL) { 87 if (status == -EINVAL) {
88 /* 88 /*
@@ -90,18 +90,14 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
90 * as an inode, we return -ESTALE to be 90 * as an inode, we return -ESTALE to be
91 * nice 91 * nice
92 */ 92 */
93 mlog(0, "test inode bit failed %d\n", status);
94 status = -ESTALE; 93 status = -ESTALE;
95 } else { 94 } else
96 mlog(ML_ERROR, "test inode bit failed %d\n", status); 95 mlog(ML_ERROR, "test inode bit failed %d\n", status);
97 }
98 goto unlock_nfs_sync; 96 goto unlock_nfs_sync;
99 } 97 }
100 98
101 /* If the inode allocator bit is clear, this inode must be stale */ 99 /* If the inode allocator bit is clear, this inode must be stale */
102 if (!set) { 100 if (!set) {
103 mlog(0, "inode %llu suballoc bit is clear\n",
104 (unsigned long long)blkno);
105 status = -ESTALE; 101 status = -ESTALE;
106 goto unlock_nfs_sync; 102 goto unlock_nfs_sync;
107 } 103 }
@@ -114,8 +110,8 @@ unlock_nfs_sync:
114check_err: 110check_err:
115 if (status < 0) { 111 if (status < 0) {
116 if (status == -ESTALE) { 112 if (status == -ESTALE) {
117 mlog(0, "stale inode ino: %llu generation: %u\n", 113 trace_ocfs2_get_dentry_stale((unsigned long long)blkno,
118 (unsigned long long)blkno, handle->ih_generation); 114 handle->ih_generation);
119 } 115 }
120 result = ERR_PTR(status); 116 result = ERR_PTR(status);
121 goto bail; 117 goto bail;
@@ -130,8 +126,9 @@ check_err:
130check_gen: 126check_gen:
131 if (handle->ih_generation != inode->i_generation) { 127 if (handle->ih_generation != inode->i_generation) {
132 iput(inode); 128 iput(inode);
133 mlog(0, "stale inode ino: %llu generation: %u\n", 129 trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
134 (unsigned long long)blkno, handle->ih_generation); 130 handle->ih_generation,
131 inode->i_generation);
135 result = ERR_PTR(-ESTALE); 132 result = ERR_PTR(-ESTALE);
136 goto bail; 133 goto bail;
137 } 134 }
@@ -141,7 +138,7 @@ check_gen:
141 mlog_errno(PTR_ERR(result)); 138 mlog_errno(PTR_ERR(result));
142 139
143bail: 140bail:
144 mlog_exit_ptr(result); 141 trace_ocfs2_get_dentry_end(result);
145 return result; 142 return result;
146} 143}
147 144
@@ -152,11 +149,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
152 struct dentry *parent; 149 struct dentry *parent;
153 struct inode *dir = child->d_inode; 150 struct inode *dir = child->d_inode;
154 151
155 mlog_entry("(0x%p, '%.*s')\n", child, 152 trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
156 child->d_name.len, child->d_name.name); 153 (unsigned long long)OCFS2_I(dir)->ip_blkno);
157
158 mlog(0, "find parent of directory %llu\n",
159 (unsigned long long)OCFS2_I(dir)->ip_blkno);
160 154
161 status = ocfs2_inode_lock(dir, NULL, 0); 155 status = ocfs2_inode_lock(dir, NULL, 0);
162 if (status < 0) { 156 if (status < 0) {
@@ -178,7 +172,7 @@ bail_unlock:
178 ocfs2_inode_unlock(dir, 0); 172 ocfs2_inode_unlock(dir, 0);
179 173
180bail: 174bail:
181 mlog_exit_ptr(parent); 175 trace_ocfs2_get_parent_end(parent);
182 176
183 return parent; 177 return parent;
184} 178}
@@ -193,9 +187,9 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
193 u32 generation; 187 u32 generation;
194 __le32 *fh = (__force __le32 *) fh_in; 188 __le32 *fh = (__force __le32 *) fh_in;
195 189
196 mlog_entry("(0x%p, '%.*s', 0x%p, %d, %d)\n", dentry, 190 trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len,
197 dentry->d_name.len, dentry->d_name.name, 191 dentry->d_name.name,
198 fh, len, connectable); 192 fh, len, connectable);
199 193
200 if (connectable && (len < 6)) { 194 if (connectable && (len < 6)) {
201 *max_len = 6; 195 *max_len = 6;
@@ -210,8 +204,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
210 blkno = OCFS2_I(inode)->ip_blkno; 204 blkno = OCFS2_I(inode)->ip_blkno;
211 generation = inode->i_generation; 205 generation = inode->i_generation;
212 206
213 mlog(0, "Encoding fh: blkno: %llu, generation: %u\n", 207 trace_ocfs2_encode_fh_self((unsigned long long)blkno, generation);
214 (unsigned long long)blkno, generation);
215 208
216 len = 3; 209 len = 3;
217 fh[0] = cpu_to_le32((u32)(blkno >> 32)); 210 fh[0] = cpu_to_le32((u32)(blkno >> 32));
@@ -236,14 +229,14 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
236 len = 6; 229 len = 6;
237 type = 2; 230 type = 2;
238 231
239 mlog(0, "Encoding parent: blkno: %llu, generation: %u\n", 232 trace_ocfs2_encode_fh_parent((unsigned long long)blkno,
240 (unsigned long long)blkno, generation); 233 generation);
241 } 234 }
242 235
243 *max_len = len; 236 *max_len = len;
244 237
245bail: 238bail:
246 mlog_exit(type); 239 trace_ocfs2_encode_fh_type(type);
247 return type; 240 return type;
248} 241}
249 242
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 09e3fdfa6d33..23457b491e8c 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -28,7 +28,6 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/fiemap.h> 29#include <linux/fiemap.h>
30 30
31#define MLOG_MASK_PREFIX ML_EXTENT_MAP
32#include <cluster/masklog.h> 31#include <cluster/masklog.h>
33 32
34#include "ocfs2.h" 33#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "inode.h" 38#include "inode.h"
40#include "super.h" 39#include "super.h"
41#include "symlink.h" 40#include "symlink.h"
41#include "ocfs2_trace.h"
42 42
43#include "buffer_head_io.h" 43#include "buffer_head_io.h"
44 44
@@ -841,10 +841,9 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
841 u64 p_block, p_count; 841 u64 p_block, p_count;
842 int i, count, done = 0; 842 int i, count, done = 0;
843 843
844 mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, " 844 trace_ocfs2_read_virt_blocks(
845 "flags = %x, validate = %p)\n", 845 inode, (unsigned long long)v_block, nr, bhs, flags,
846 inode, (unsigned long long)v_block, nr, bhs, flags, 846 validate);
847 validate);
848 847
849 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 848 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
850 i_size_read(inode)) { 849 i_size_read(inode)) {
@@ -897,7 +896,6 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
897 } 896 }
898 897
899out: 898out:
900 mlog_exit(rc);
901 return rc; 899 return rc;
902} 900}
903 901
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a6651956482e..41565ae52856 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -38,7 +38,6 @@
38#include <linux/quotaops.h> 38#include <linux/quotaops.h>
39#include <linux/blkdev.h> 39#include <linux/blkdev.h>
40 40
41#define MLOG_MASK_PREFIX ML_INODE
42#include <cluster/masklog.h> 41#include <cluster/masklog.h>
43 42
44#include "ocfs2.h" 43#include "ocfs2.h"
@@ -61,6 +60,7 @@
61#include "acl.h" 60#include "acl.h"
62#include "quota.h" 61#include "quota.h"
63#include "refcounttree.h" 62#include "refcounttree.h"
63#include "ocfs2_trace.h"
64 64
65#include "buffer_head_io.h" 65#include "buffer_head_io.h"
66 66
@@ -99,8 +99,10 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
99 int mode = file->f_flags; 99 int mode = file->f_flags;
100 struct ocfs2_inode_info *oi = OCFS2_I(inode); 100 struct ocfs2_inode_info *oi = OCFS2_I(inode);
101 101
102 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 102 trace_ocfs2_file_open(inode, file, file->f_path.dentry,
103 file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); 103 (unsigned long long)OCFS2_I(inode)->ip_blkno,
104 file->f_path.dentry->d_name.len,
105 file->f_path.dentry->d_name.name, mode);
104 106
105 if (file->f_mode & FMODE_WRITE) 107 if (file->f_mode & FMODE_WRITE)
106 dquot_initialize(inode); 108 dquot_initialize(inode);
@@ -135,7 +137,6 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
135 } 137 }
136 138
137leave: 139leave:
138 mlog_exit(status);
139 return status; 140 return status;
140} 141}
141 142
@@ -143,19 +144,19 @@ static int ocfs2_file_release(struct inode *inode, struct file *file)
143{ 144{
144 struct ocfs2_inode_info *oi = OCFS2_I(inode); 145 struct ocfs2_inode_info *oi = OCFS2_I(inode);
145 146
146 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
147 file->f_path.dentry->d_name.len,
148 file->f_path.dentry->d_name.name);
149
150 spin_lock(&oi->ip_lock); 147 spin_lock(&oi->ip_lock);
151 if (!--oi->ip_open_count) 148 if (!--oi->ip_open_count)
152 oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; 149 oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
150
151 trace_ocfs2_file_release(inode, file, file->f_path.dentry,
152 oi->ip_blkno,
153 file->f_path.dentry->d_name.len,
154 file->f_path.dentry->d_name.name,
155 oi->ip_open_count);
153 spin_unlock(&oi->ip_lock); 156 spin_unlock(&oi->ip_lock);
154 157
155 ocfs2_free_file_private(inode, file); 158 ocfs2_free_file_private(inode, file);
156 159
157 mlog_exit(0);
158
159 return 0; 160 return 0;
160} 161}
161 162
@@ -177,9 +178,11 @@ static int ocfs2_sync_file(struct file *file, int datasync)
177 struct inode *inode = file->f_mapping->host; 178 struct inode *inode = file->f_mapping->host;
178 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 179 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
179 180
180 mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync, 181 trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
181 file->f_path.dentry, file->f_path.dentry->d_name.len, 182 OCFS2_I(inode)->ip_blkno,
182 file->f_path.dentry->d_name.name); 183 file->f_path.dentry->d_name.len,
184 file->f_path.dentry->d_name.name,
185 (unsigned long long)datasync);
183 186
184 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { 187 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
185 /* 188 /*
@@ -195,7 +198,8 @@ static int ocfs2_sync_file(struct file *file, int datasync)
195 err = jbd2_journal_force_commit(journal); 198 err = jbd2_journal_force_commit(journal);
196 199
197bail: 200bail:
198 mlog_exit(err); 201 if (err)
202 mlog_errno(err);
199 203
200 return (err < 0) ? -EIO : 0; 204 return (err < 0) ? -EIO : 0;
201} 205}
@@ -251,8 +255,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
251 handle_t *handle; 255 handle_t *handle;
252 struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data; 256 struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data;
253 257
254 mlog_entry_void();
255
256 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 258 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
257 if (IS_ERR(handle)) { 259 if (IS_ERR(handle)) {
258 ret = PTR_ERR(handle); 260 ret = PTR_ERR(handle);
@@ -280,7 +282,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
280out_commit: 282out_commit:
281 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 283 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
282out: 284out:
283 mlog_exit(ret);
284 return ret; 285 return ret;
285} 286}
286 287
@@ -291,7 +292,6 @@ static int ocfs2_set_inode_size(handle_t *handle,
291{ 292{
292 int status; 293 int status;
293 294
294 mlog_entry_void();
295 i_size_write(inode, new_i_size); 295 i_size_write(inode, new_i_size);
296 inode->i_blocks = ocfs2_inode_sector_count(inode); 296 inode->i_blocks = ocfs2_inode_sector_count(inode);
297 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 297 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -303,7 +303,6 @@ static int ocfs2_set_inode_size(handle_t *handle,
303 } 303 }
304 304
305bail: 305bail:
306 mlog_exit(status);
307 return status; 306 return status;
308} 307}
309 308
@@ -375,8 +374,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
375 struct ocfs2_dinode *di; 374 struct ocfs2_dinode *di;
376 u64 cluster_bytes; 375 u64 cluster_bytes;
377 376
378 mlog_entry_void();
379
380 /* 377 /*
381 * We need to CoW the cluster contains the offset if it is reflinked 378 * We need to CoW the cluster contains the offset if it is reflinked
382 * since we will call ocfs2_zero_range_for_truncate later which will 379 * since we will call ocfs2_zero_range_for_truncate later which will
@@ -429,8 +426,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
429out_commit: 426out_commit:
430 ocfs2_commit_trans(osb, handle); 427 ocfs2_commit_trans(osb, handle);
431out: 428out:
432
433 mlog_exit(status);
434 return status; 429 return status;
435} 430}
436 431
@@ -442,14 +437,14 @@ static int ocfs2_truncate_file(struct inode *inode,
442 struct ocfs2_dinode *fe = NULL; 437 struct ocfs2_dinode *fe = NULL;
443 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 438 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
444 439
445 mlog_entry("(inode = %llu, new_i_size = %llu\n",
446 (unsigned long long)OCFS2_I(inode)->ip_blkno,
447 (unsigned long long)new_i_size);
448
449 /* We trust di_bh because it comes from ocfs2_inode_lock(), which 440 /* We trust di_bh because it comes from ocfs2_inode_lock(), which
450 * already validated it */ 441 * already validated it */
451 fe = (struct ocfs2_dinode *) di_bh->b_data; 442 fe = (struct ocfs2_dinode *) di_bh->b_data;
452 443
444 trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno,
445 (unsigned long long)le64_to_cpu(fe->i_size),
446 (unsigned long long)new_i_size);
447
453 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), 448 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
454 "Inode %llu, inode i_size = %lld != di " 449 "Inode %llu, inode i_size = %lld != di "
455 "i_size = %llu, i_flags = 0x%x\n", 450 "i_size = %llu, i_flags = 0x%x\n",
@@ -459,19 +454,14 @@ static int ocfs2_truncate_file(struct inode *inode,
459 le32_to_cpu(fe->i_flags)); 454 le32_to_cpu(fe->i_flags));
460 455
461 if (new_i_size > le64_to_cpu(fe->i_size)) { 456 if (new_i_size > le64_to_cpu(fe->i_size)) {
462 mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", 457 trace_ocfs2_truncate_file_error(
463 (unsigned long long)le64_to_cpu(fe->i_size), 458 (unsigned long long)le64_to_cpu(fe->i_size),
464 (unsigned long long)new_i_size); 459 (unsigned long long)new_i_size);
465 status = -EINVAL; 460 status = -EINVAL;
466 mlog_errno(status); 461 mlog_errno(status);
467 goto bail; 462 goto bail;
468 } 463 }
469 464
470 mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
471 (unsigned long long)le64_to_cpu(fe->i_blkno),
472 (unsigned long long)le64_to_cpu(fe->i_size),
473 (unsigned long long)new_i_size);
474
475 /* lets handle the simple truncate cases before doing any more 465 /* lets handle the simple truncate cases before doing any more
476 * cluster locking. */ 466 * cluster locking. */
477 if (new_i_size == le64_to_cpu(fe->i_size)) 467 if (new_i_size == le64_to_cpu(fe->i_size))
@@ -525,7 +515,6 @@ bail:
525 if (!status && OCFS2_I(inode)->ip_clusters == 0) 515 if (!status && OCFS2_I(inode)->ip_clusters == 0)
526 status = ocfs2_try_remove_refcount_tree(inode, di_bh); 516 status = ocfs2_try_remove_refcount_tree(inode, di_bh);
527 517
528 mlog_exit(status);
529 return status; 518 return status;
530} 519}
531 520
@@ -578,8 +567,6 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
578 struct ocfs2_extent_tree et; 567 struct ocfs2_extent_tree et;
579 int did_quota = 0; 568 int did_quota = 0;
580 569
581 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
582
583 /* 570 /*
584 * This function only exists for file systems which don't 571 * This function only exists for file systems which don't
585 * support holes. 572 * support holes.
@@ -596,11 +583,6 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
596restart_all: 583restart_all:
597 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 584 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
598 585
599 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
600 "clusters_to_add = %u\n",
601 (unsigned long long)OCFS2_I(inode)->ip_blkno,
602 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
603 clusters_to_add);
604 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh); 586 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
605 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, 587 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
606 &data_ac, &meta_ac); 588 &data_ac, &meta_ac);
@@ -620,6 +602,12 @@ restart_all:
620 } 602 }
621 603
622restarted_transaction: 604restarted_transaction:
605 trace_ocfs2_extend_allocation(
606 (unsigned long long)OCFS2_I(inode)->ip_blkno,
607 (unsigned long long)i_size_read(inode),
608 le32_to_cpu(fe->i_clusters), clusters_to_add,
609 why, restart_func);
610
623 status = dquot_alloc_space_nodirty(inode, 611 status = dquot_alloc_space_nodirty(inode,
624 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); 612 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
625 if (status) 613 if (status)
@@ -666,13 +654,11 @@ restarted_transaction:
666 654
667 if (why != RESTART_NONE && clusters_to_add) { 655 if (why != RESTART_NONE && clusters_to_add) {
668 if (why == RESTART_META) { 656 if (why == RESTART_META) {
669 mlog(0, "restarting function.\n");
670 restart_func = 1; 657 restart_func = 1;
671 status = 0; 658 status = 0;
672 } else { 659 } else {
673 BUG_ON(why != RESTART_TRANS); 660 BUG_ON(why != RESTART_TRANS);
674 661
675 mlog(0, "restarting transaction.\n");
676 /* TODO: This can be more intelligent. */ 662 /* TODO: This can be more intelligent. */
677 credits = ocfs2_calc_extend_credits(osb->sb, 663 credits = ocfs2_calc_extend_credits(osb->sb,
678 &fe->id2.i_list, 664 &fe->id2.i_list,
@@ -689,11 +675,11 @@ restarted_transaction:
689 } 675 }
690 } 676 }
691 677
692 mlog(0, "fe: i_clusters = %u, i_size=%llu\n", 678 trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno,
693 le32_to_cpu(fe->i_clusters), 679 le32_to_cpu(fe->i_clusters),
694 (unsigned long long)le64_to_cpu(fe->i_size)); 680 (unsigned long long)le64_to_cpu(fe->i_size),
695 mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", 681 OCFS2_I(inode)->ip_clusters,
696 OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); 682 (unsigned long long)i_size_read(inode));
697 683
698leave: 684leave:
699 if (status < 0 && did_quota) 685 if (status < 0 && did_quota)
@@ -718,7 +704,6 @@ leave:
718 brelse(bh); 704 brelse(bh);
719 bh = NULL; 705 bh = NULL;
720 706
721 mlog_exit(status);
722 return status; 707 return status;
723} 708}
724 709
@@ -785,10 +770,11 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
785 if (!zero_to) 770 if (!zero_to)
786 zero_to = PAGE_CACHE_SIZE; 771 zero_to = PAGE_CACHE_SIZE;
787 772
788 mlog(0, 773 trace_ocfs2_write_zero_page(
789 "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n", 774 (unsigned long long)OCFS2_I(inode)->ip_blkno,
790 (unsigned long long)abs_from, (unsigned long long)abs_to, 775 (unsigned long long)abs_from,
791 index, zero_from, zero_to); 776 (unsigned long long)abs_to,
777 index, zero_from, zero_to);
792 778
793 /* We know that zero_from is block aligned */ 779 /* We know that zero_from is block aligned */
794 for (block_start = zero_from; block_start < zero_to; 780 for (block_start = zero_from; block_start < zero_to;
@@ -928,9 +914,10 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
928 u64 next_pos; 914 u64 next_pos;
929 u64 zero_pos = range_start; 915 u64 zero_pos = range_start;
930 916
931 mlog(0, "range_start = %llu, range_end = %llu\n", 917 trace_ocfs2_zero_extend_range(
932 (unsigned long long)range_start, 918 (unsigned long long)OCFS2_I(inode)->ip_blkno,
933 (unsigned long long)range_end); 919 (unsigned long long)range_start,
920 (unsigned long long)range_end);
934 BUG_ON(range_start >= range_end); 921 BUG_ON(range_start >= range_end);
935 922
936 while (zero_pos < range_end) { 923 while (zero_pos < range_end) {
@@ -962,9 +949,9 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
962 struct super_block *sb = inode->i_sb; 949 struct super_block *sb = inode->i_sb;
963 950
964 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 951 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
965 mlog(0, "zero_start %llu for i_size %llu\n", 952 trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno,
966 (unsigned long long)zero_start, 953 (unsigned long long)zero_start,
967 (unsigned long long)i_size_read(inode)); 954 (unsigned long long)i_size_read(inode));
968 while (zero_start < zero_to_size) { 955 while (zero_start < zero_to_size) {
969 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, 956 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
970 zero_to_size, 957 zero_to_size,
@@ -1113,30 +1100,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1113 struct dquot *transfer_to[MAXQUOTAS] = { }; 1100 struct dquot *transfer_to[MAXQUOTAS] = { };
1114 int qtype; 1101 int qtype;
1115 1102
1116 mlog_entry("(0x%p, '%.*s')\n", dentry, 1103 trace_ocfs2_setattr(inode, dentry,
1117 dentry->d_name.len, dentry->d_name.name); 1104 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1105 dentry->d_name.len, dentry->d_name.name,
1106 attr->ia_valid, attr->ia_mode,
1107 attr->ia_uid, attr->ia_gid);
1118 1108
1119 /* ensuring we don't even attempt to truncate a symlink */ 1109 /* ensuring we don't even attempt to truncate a symlink */
1120 if (S_ISLNK(inode->i_mode)) 1110 if (S_ISLNK(inode->i_mode))
1121 attr->ia_valid &= ~ATTR_SIZE; 1111 attr->ia_valid &= ~ATTR_SIZE;
1122 1112
1123 if (attr->ia_valid & ATTR_MODE)
1124 mlog(0, "mode change: %d\n", attr->ia_mode);
1125 if (attr->ia_valid & ATTR_UID)
1126 mlog(0, "uid change: %d\n", attr->ia_uid);
1127 if (attr->ia_valid & ATTR_GID)
1128 mlog(0, "gid change: %d\n", attr->ia_gid);
1129 if (attr->ia_valid & ATTR_SIZE)
1130 mlog(0, "size change...\n");
1131 if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
1132 mlog(0, "time change...\n");
1133
1134#define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ 1113#define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
1135 | ATTR_GID | ATTR_UID | ATTR_MODE) 1114 | ATTR_GID | ATTR_UID | ATTR_MODE)
1136 if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { 1115 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
1137 mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid);
1138 return 0; 1116 return 0;
1139 }
1140 1117
1141 status = inode_change_ok(inode, attr); 1118 status = inode_change_ok(inode, attr);
1142 if (status) 1119 if (status)
@@ -1274,7 +1251,6 @@ bail:
1274 mlog_errno(status); 1251 mlog_errno(status);
1275 } 1252 }
1276 1253
1277 mlog_exit(status);
1278 return status; 1254 return status;
1279} 1255}
1280 1256
@@ -1287,8 +1263,6 @@ int ocfs2_getattr(struct vfsmount *mnt,
1287 struct ocfs2_super *osb = sb->s_fs_info; 1263 struct ocfs2_super *osb = sb->s_fs_info;
1288 int err; 1264 int err;
1289 1265
1290 mlog_entry_void();
1291
1292 err = ocfs2_inode_revalidate(dentry); 1266 err = ocfs2_inode_revalidate(dentry);
1293 if (err) { 1267 if (err) {
1294 if (err != -ENOENT) 1268 if (err != -ENOENT)
@@ -1302,8 +1276,6 @@ int ocfs2_getattr(struct vfsmount *mnt,
1302 stat->blksize = osb->s_clustersize; 1276 stat->blksize = osb->s_clustersize;
1303 1277
1304bail: 1278bail:
1305 mlog_exit(err);
1306
1307 return err; 1279 return err;
1308} 1280}
1309 1281
@@ -1314,8 +1286,6 @@ int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1314 if (flags & IPERM_FLAG_RCU) 1286 if (flags & IPERM_FLAG_RCU)
1315 return -ECHILD; 1287 return -ECHILD;
1316 1288
1317 mlog_entry_void();
1318
1319 ret = ocfs2_inode_lock(inode, NULL, 0); 1289 ret = ocfs2_inode_lock(inode, NULL, 0);
1320 if (ret) { 1290 if (ret) {
1321 if (ret != -ENOENT) 1291 if (ret != -ENOENT)
@@ -1327,7 +1297,6 @@ int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1327 1297
1328 ocfs2_inode_unlock(inode, 0); 1298 ocfs2_inode_unlock(inode, 0);
1329out: 1299out:
1330 mlog_exit(ret);
1331 return ret; 1300 return ret;
1332} 1301}
1333 1302
@@ -1339,8 +1308,9 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1339 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1308 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1340 struct ocfs2_dinode *di; 1309 struct ocfs2_dinode *di;
1341 1310
1342 mlog_entry("(Inode %llu, mode 0%o)\n", 1311 trace_ocfs2_write_remove_suid(
1343 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); 1312 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1313 inode->i_mode);
1344 1314
1345 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1315 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1346 if (IS_ERR(handle)) { 1316 if (IS_ERR(handle)) {
@@ -1368,7 +1338,6 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1368out_trans: 1338out_trans:
1369 ocfs2_commit_trans(osb, handle); 1339 ocfs2_commit_trans(osb, handle);
1370out: 1340out:
1371 mlog_exit(ret);
1372 return ret; 1341 return ret;
1373} 1342}
1374 1343
@@ -1547,8 +1516,9 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1547 * partial clusters here. There's no need to worry about 1516 * partial clusters here. There's no need to worry about
1548 * physical allocation - the zeroing code knows to skip holes. 1517 * physical allocation - the zeroing code knows to skip holes.
1549 */ 1518 */
1550 mlog(0, "byte start: %llu, end: %llu\n", 1519 trace_ocfs2_zero_partial_clusters(
1551 (unsigned long long)start, (unsigned long long)end); 1520 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1521 (unsigned long long)start, (unsigned long long)end);
1552 1522
1553 /* 1523 /*
1554 * If both edges are on a cluster boundary then there's no 1524 * If both edges are on a cluster boundary then there's no
@@ -1572,8 +1542,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1572 if (tmpend > end) 1542 if (tmpend > end)
1573 tmpend = end; 1543 tmpend = end;
1574 1544
1575 mlog(0, "1st range: start: %llu, tmpend: %llu\n", 1545 trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
1576 (unsigned long long)start, (unsigned long long)tmpend); 1546 (unsigned long long)tmpend);
1577 1547
1578 ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); 1548 ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
1579 if (ret) 1549 if (ret)
@@ -1587,8 +1557,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1587 */ 1557 */
1588 start = end & ~(osb->s_clustersize - 1); 1558 start = end & ~(osb->s_clustersize - 1);
1589 1559
1590 mlog(0, "2nd range: start: %llu, end: %llu\n", 1560 trace_ocfs2_zero_partial_clusters_range2(
1591 (unsigned long long)start, (unsigned long long)end); 1561 (unsigned long long)start, (unsigned long long)end);
1592 1562
1593 ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); 1563 ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
1594 if (ret) 1564 if (ret)
@@ -1688,6 +1658,11 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1688 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 1658 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1689 ocfs2_init_dealloc_ctxt(&dealloc); 1659 ocfs2_init_dealloc_ctxt(&dealloc);
1690 1660
1661 trace_ocfs2_remove_inode_range(
1662 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1663 (unsigned long long)byte_start,
1664 (unsigned long long)byte_len);
1665
1691 if (byte_len == 0) 1666 if (byte_len == 0)
1692 return 0; 1667 return 0;
1693 1668
@@ -1734,11 +1709,6 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1734 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits; 1709 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
1735 cluster_in_el = trunc_end; 1710 cluster_in_el = trunc_end;
1736 1711
1737 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
1738 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1739 (unsigned long long)byte_start,
1740 (unsigned long long)byte_len, trunc_start, trunc_end);
1741
1742 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1712 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1743 if (ret) { 1713 if (ret) {
1744 mlog_errno(ret); 1714 mlog_errno(ret);
@@ -2093,7 +2063,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2093 int ret = 0, meta_level = 0; 2063 int ret = 0, meta_level = 0;
2094 struct dentry *dentry = file->f_path.dentry; 2064 struct dentry *dentry = file->f_path.dentry;
2095 struct inode *inode = dentry->d_inode; 2065 struct inode *inode = dentry->d_inode;
2096 loff_t saved_pos, end; 2066 loff_t saved_pos = 0, end;
2097 2067
2098 /* 2068 /*
2099 * We start with a read level meta lock and only jump to an ex 2069 * We start with a read level meta lock and only jump to an ex
@@ -2132,12 +2102,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2132 2102
2133 /* work on a copy of ppos until we're sure that we won't have 2103 /* work on a copy of ppos until we're sure that we won't have
2134 * to recalculate it due to relocking. */ 2104 * to recalculate it due to relocking. */
2135 if (appending) { 2105 if (appending)
2136 saved_pos = i_size_read(inode); 2106 saved_pos = i_size_read(inode);
2137 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); 2107 else
2138 } else {
2139 saved_pos = *ppos; 2108 saved_pos = *ppos;
2140 }
2141 2109
2142 end = saved_pos + count; 2110 end = saved_pos + count;
2143 2111
@@ -2208,6 +2176,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2208 *ppos = saved_pos; 2176 *ppos = saved_pos;
2209 2177
2210out_unlock: 2178out_unlock:
2179 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
2180 saved_pos, appending, count,
2181 direct_io, has_refcount);
2182
2211 if (meta_level >= 0) 2183 if (meta_level >= 0)
2212 ocfs2_inode_unlock(inode, meta_level); 2184 ocfs2_inode_unlock(inode, meta_level);
2213 2185
@@ -2233,10 +2205,11 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2233 int full_coherency = !(osb->s_mount_opt & 2205 int full_coherency = !(osb->s_mount_opt &
2234 OCFS2_MOUNT_COHERENCY_BUFFERED); 2206 OCFS2_MOUNT_COHERENCY_BUFFERED);
2235 2207
2236 mlog_entry("(0x%p, %u, '%.*s')\n", file, 2208 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
2237 (unsigned int)nr_segs, 2209 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2238 file->f_path.dentry->d_name.len, 2210 file->f_path.dentry->d_name.len,
2239 file->f_path.dentry->d_name.name); 2211 file->f_path.dentry->d_name.name,
2212 (unsigned int)nr_segs);
2240 2213
2241 if (iocb->ki_left == 0) 2214 if (iocb->ki_left == 0)
2242 return 0; 2215 return 0;
@@ -2402,7 +2375,6 @@ out_sems:
2402 2375
2403 if (written) 2376 if (written)
2404 ret = written; 2377 ret = written;
2405 mlog_exit(ret);
2406 return ret; 2378 return ret;
2407} 2379}
2408 2380
@@ -2438,10 +2410,11 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2438 .u.file = out, 2410 .u.file = out,
2439 }; 2411 };
2440 2412
2441 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, 2413
2442 (unsigned int)len, 2414 trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry,
2443 out->f_path.dentry->d_name.len, 2415 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2444 out->f_path.dentry->d_name.name); 2416 out->f_path.dentry->d_name.len,
2417 out->f_path.dentry->d_name.name, len);
2445 2418
2446 if (pipe->inode) 2419 if (pipe->inode)
2447 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); 2420 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
@@ -2485,7 +2458,6 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2485 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 2458 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2486 } 2459 }
2487 2460
2488 mlog_exit(ret);
2489 return ret; 2461 return ret;
2490} 2462}
2491 2463
@@ -2498,10 +2470,10 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2498 int ret = 0, lock_level = 0; 2470 int ret = 0, lock_level = 0;
2499 struct inode *inode = in->f_path.dentry->d_inode; 2471 struct inode *inode = in->f_path.dentry->d_inode;
2500 2472
2501 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, 2473 trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
2502 (unsigned int)len, 2474 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2503 in->f_path.dentry->d_name.len, 2475 in->f_path.dentry->d_name.len,
2504 in->f_path.dentry->d_name.name); 2476 in->f_path.dentry->d_name.name, len);
2505 2477
2506 /* 2478 /*
2507 * See the comment in ocfs2_file_aio_read() 2479 * See the comment in ocfs2_file_aio_read()
@@ -2516,7 +2488,6 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2516 ret = generic_file_splice_read(in, ppos, pipe, len, flags); 2488 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
2517 2489
2518bail: 2490bail:
2519 mlog_exit(ret);
2520 return ret; 2491 return ret;
2521} 2492}
2522 2493
@@ -2529,10 +2500,11 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2529 struct file *filp = iocb->ki_filp; 2500 struct file *filp = iocb->ki_filp;
2530 struct inode *inode = filp->f_path.dentry->d_inode; 2501 struct inode *inode = filp->f_path.dentry->d_inode;
2531 2502
2532 mlog_entry("(0x%p, %u, '%.*s')\n", filp, 2503 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
2533 (unsigned int)nr_segs, 2504 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2534 filp->f_path.dentry->d_name.len, 2505 filp->f_path.dentry->d_name.len,
2535 filp->f_path.dentry->d_name.name); 2506 filp->f_path.dentry->d_name.name, nr_segs);
2507
2536 2508
2537 if (!inode) { 2509 if (!inode) {
2538 ret = -EINVAL; 2510 ret = -EINVAL;
@@ -2578,8 +2550,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2578 ocfs2_inode_unlock(inode, lock_level); 2550 ocfs2_inode_unlock(inode, lock_level);
2579 2551
2580 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); 2552 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
2581 if (ret == -EINVAL) 2553 trace_generic_file_aio_read_ret(ret);
2582 mlog(0, "generic_file_aio_read returned -EINVAL\n");
2583 2554
2584 /* buffered aio wouldn't have proper lock coverage today */ 2555 /* buffered aio wouldn't have proper lock coverage today */
2585 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 2556 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
@@ -2597,7 +2568,6 @@ bail:
2597 } 2568 }
2598 if (rw_level != -1) 2569 if (rw_level != -1)
2599 ocfs2_rw_unlock(inode, rw_level); 2570 ocfs2_rw_unlock(inode, rw_level);
2600 mlog_exit(ret);
2601 2571
2602 return ret; 2572 return ret;
2603} 2573}
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index 1aa863dd901f..d8208b20dc53 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -28,7 +28,6 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30 30
31#define MLOG_MASK_PREFIX ML_SUPER
32#include <cluster/masklog.h> 31#include <cluster/masklog.h>
33 32
34#include "ocfs2.h" 33#include "ocfs2.h"
@@ -37,6 +36,7 @@
37#include "heartbeat.h" 36#include "heartbeat.h"
38#include "inode.h" 37#include "inode.h"
39#include "journal.h" 38#include "journal.h"
39#include "ocfs2_trace.h"
40 40
41#include "buffer_head_io.h" 41#include "buffer_head_io.h"
42 42
@@ -66,7 +66,7 @@ void ocfs2_do_node_down(int node_num, void *data)
66 66
67 BUG_ON(osb->node_num == node_num); 67 BUG_ON(osb->node_num == node_num);
68 68
69 mlog(0, "ocfs2: node down event for %d\n", node_num); 69 trace_ocfs2_do_node_down(node_num);
70 70
71 if (!osb->cconn) { 71 if (!osb->cconn) {
72 /* 72 /*
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4068c6c4c6f6..b4c8bb6b8d28 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -31,7 +31,6 @@
31 31
32#include <asm/byteorder.h> 32#include <asm/byteorder.h>
33 33
34#define MLOG_MASK_PREFIX ML_INODE
35#include <cluster/masklog.h> 34#include <cluster/masklog.h>
36 35
37#include "ocfs2.h" 36#include "ocfs2.h"
@@ -53,6 +52,7 @@
53#include "uptodate.h" 52#include "uptodate.h"
54#include "xattr.h" 53#include "xattr.h"
55#include "refcounttree.h" 54#include "refcounttree.h"
55#include "ocfs2_trace.h"
56 56
57#include "buffer_head_io.h" 57#include "buffer_head_io.h"
58 58
@@ -131,7 +131,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
131 struct super_block *sb = osb->sb; 131 struct super_block *sb = osb->sb;
132 struct ocfs2_find_inode_args args; 132 struct ocfs2_find_inode_args args;
133 133
134 mlog_entry("(blkno = %llu)\n", (unsigned long long)blkno); 134 trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
135 sysfile_type);
135 136
136 /* Ok. By now we've either got the offsets passed to us by the 137 /* Ok. By now we've either got the offsets passed to us by the
137 * caller, or we just pulled them off the bh. Lets do some 138 * caller, or we just pulled them off the bh. Lets do some
@@ -152,16 +153,16 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
152 /* inode was *not* in the inode cache. 2.6.x requires 153 /* inode was *not* in the inode cache. 2.6.x requires
153 * us to do our own read_inode call and unlock it 154 * us to do our own read_inode call and unlock it
154 * afterwards. */ 155 * afterwards. */
155 if (inode && inode->i_state & I_NEW) {
156 mlog(0, "Inode was not in inode cache, reading it.\n");
157 ocfs2_read_locked_inode(inode, &args);
158 unlock_new_inode(inode);
159 }
160 if (inode == NULL) { 156 if (inode == NULL) {
161 inode = ERR_PTR(-ENOMEM); 157 inode = ERR_PTR(-ENOMEM);
162 mlog_errno(PTR_ERR(inode)); 158 mlog_errno(PTR_ERR(inode));
163 goto bail; 159 goto bail;
164 } 160 }
161 trace_ocfs2_iget5_locked(inode->i_state);
162 if (inode->i_state & I_NEW) {
163 ocfs2_read_locked_inode(inode, &args);
164 unlock_new_inode(inode);
165 }
165 if (is_bad_inode(inode)) { 166 if (is_bad_inode(inode)) {
166 iput(inode); 167 iput(inode);
167 inode = ERR_PTR(-ESTALE); 168 inode = ERR_PTR(-ESTALE);
@@ -170,9 +171,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
170 171
171bail: 172bail:
172 if (!IS_ERR(inode)) { 173 if (!IS_ERR(inode)) {
173 mlog(0, "returning inode with number %llu\n", 174 trace_ocfs2_iget_end(inode,
174 (unsigned long long)OCFS2_I(inode)->ip_blkno); 175 (unsigned long long)OCFS2_I(inode)->ip_blkno);
175 mlog_exit_ptr(inode);
176 } 176 }
177 177
178 return inode; 178 return inode;
@@ -192,18 +192,17 @@ static int ocfs2_find_actor(struct inode *inode, void *opaque)
192 struct ocfs2_inode_info *oi = OCFS2_I(inode); 192 struct ocfs2_inode_info *oi = OCFS2_I(inode);
193 int ret = 0; 193 int ret = 0;
194 194
195 mlog_entry("(0x%p, %lu, 0x%p)\n", inode, inode->i_ino, opaque);
196
197 args = opaque; 195 args = opaque;
198 196
199 mlog_bug_on_msg(!inode, "No inode in find actor!\n"); 197 mlog_bug_on_msg(!inode, "No inode in find actor!\n");
200 198
199 trace_ocfs2_find_actor(inode, inode->i_ino, opaque, args->fi_blkno);
200
201 if (oi->ip_blkno != args->fi_blkno) 201 if (oi->ip_blkno != args->fi_blkno)
202 goto bail; 202 goto bail;
203 203
204 ret = 1; 204 ret = 1;
205bail: 205bail:
206 mlog_exit(ret);
207 return ret; 206 return ret;
208} 207}
209 208
@@ -218,8 +217,6 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
218 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, 217 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
219 ocfs2_file_ip_alloc_sem_key; 218 ocfs2_file_ip_alloc_sem_key;
220 219
221 mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
222
223 inode->i_ino = args->fi_ino; 220 inode->i_ino = args->fi_ino;
224 OCFS2_I(inode)->ip_blkno = args->fi_blkno; 221 OCFS2_I(inode)->ip_blkno = args->fi_blkno;
225 if (args->fi_sysfile_type != 0) 222 if (args->fi_sysfile_type != 0)
@@ -235,7 +232,6 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
235 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, 232 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
236 &ocfs2_file_ip_alloc_sem_key); 233 &ocfs2_file_ip_alloc_sem_key);
237 234
238 mlog_exit(0);
239 return 0; 235 return 0;
240} 236}
241 237
@@ -246,9 +242,6 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
246 struct ocfs2_super *osb; 242 struct ocfs2_super *osb;
247 int use_plocks = 1; 243 int use_plocks = 1;
248 244
249 mlog_entry("(0x%p, size:%llu)\n", inode,
250 (unsigned long long)le64_to_cpu(fe->i_size));
251
252 sb = inode->i_sb; 245 sb = inode->i_sb;
253 osb = OCFS2_SB(sb); 246 osb = OCFS2_SB(sb);
254 247
@@ -300,20 +293,20 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
300 293
301 inode->i_nlink = ocfs2_read_links_count(fe); 294 inode->i_nlink = ocfs2_read_links_count(fe);
302 295
296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno,
297 le32_to_cpu(fe->i_flags));
303 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { 298 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; 299 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
305 inode->i_flags |= S_NOQUOTA; 300 inode->i_flags |= S_NOQUOTA;
306 } 301 }
307 302
308 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 303 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
309 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
310 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
311 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { 305 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
312 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 306 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
313 } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) { 307 } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
314 inode->i_flags |= S_NOQUOTA; 308 inode->i_flags |= S_NOQUOTA;
315 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { 309 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
316 mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
317 /* we can't actually hit this as read_inode can't 310 /* we can't actually hit this as read_inode can't
318 * handle superblocks today ;-) */ 311 * handle superblocks today ;-) */
319 BUG(); 312 BUG();
@@ -381,7 +374,6 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
381 if (S_ISDIR(inode->i_mode)) 374 if (S_ISDIR(inode->i_mode))
382 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv, 375 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
383 OCFS2_RESV_FLAG_DIR); 376 OCFS2_RESV_FLAG_DIR);
384 mlog_exit_void();
385} 377}
386 378
387static int ocfs2_read_locked_inode(struct inode *inode, 379static int ocfs2_read_locked_inode(struct inode *inode,
@@ -394,8 +386,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
394 int status, can_lock; 386 int status, can_lock;
395 u32 generation = 0; 387 u32 generation = 0;
396 388
397 mlog_entry("(0x%p, 0x%p)\n", inode, args);
398
399 status = -EINVAL; 389 status = -EINVAL;
400 if (inode == NULL || inode->i_sb == NULL) { 390 if (inode == NULL || inode->i_sb == NULL) {
401 mlog(ML_ERROR, "bad inode\n"); 391 mlog(ML_ERROR, "bad inode\n");
@@ -443,6 +433,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
443 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) 433 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
444 && !ocfs2_mount_local(osb); 434 && !ocfs2_mount_local(osb);
445 435
436 trace_ocfs2_read_locked_inode(
437 (unsigned long long)OCFS2_I(inode)->ip_blkno, can_lock);
438
446 /* 439 /*
447 * To maintain backwards compatibility with older versions of 440 * To maintain backwards compatibility with older versions of
448 * ocfs2-tools, we still store the generation value for system 441 * ocfs2-tools, we still store the generation value for system
@@ -534,7 +527,6 @@ bail:
534 if (args && bh) 527 if (args && bh)
535 brelse(bh); 528 brelse(bh);
536 529
537 mlog_exit(status);
538 return status; 530 return status;
539} 531}
540 532
@@ -551,8 +543,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
551 struct ocfs2_dinode *fe; 543 struct ocfs2_dinode *fe;
552 handle_t *handle = NULL; 544 handle_t *handle = NULL;
553 545
554 mlog_entry_void();
555
556 fe = (struct ocfs2_dinode *) fe_bh->b_data; 546 fe = (struct ocfs2_dinode *) fe_bh->b_data;
557 547
558 /* 548 /*
@@ -600,7 +590,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
600out: 590out:
601 if (handle) 591 if (handle)
602 ocfs2_commit_trans(osb, handle); 592 ocfs2_commit_trans(osb, handle);
603 mlog_exit(status);
604 return status; 593 return status;
605} 594}
606 595
@@ -696,8 +685,6 @@ static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb,
696 685
697 spin_lock(&osb->osb_lock); 686 spin_lock(&osb->osb_lock);
698 if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) { 687 if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) {
699 mlog(0, "Recovery is happening on orphan dir %d, will skip "
700 "this inode\n", slot);
701 ret = -EDEADLK; 688 ret = -EDEADLK;
702 goto out; 689 goto out;
703 } 690 }
@@ -706,6 +693,7 @@ static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb,
706 osb->osb_orphan_wipes[slot]++; 693 osb->osb_orphan_wipes[slot]++;
707out: 694out:
708 spin_unlock(&osb->osb_lock); 695 spin_unlock(&osb->osb_lock);
696 trace_ocfs2_check_orphan_recovery_state(slot, ret);
709 return ret; 697 return ret;
710} 698}
711 699
@@ -816,6 +804,10 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
816 struct ocfs2_inode_info *oi = OCFS2_I(inode); 804 struct ocfs2_inode_info *oi = OCFS2_I(inode);
817 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 805 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
818 806
807 trace_ocfs2_inode_is_valid_to_delete(current, osb->dc_task,
808 (unsigned long long)oi->ip_blkno,
809 oi->ip_flags);
810
819 /* We shouldn't be getting here for the root directory 811 /* We shouldn't be getting here for the root directory
820 * inode.. */ 812 * inode.. */
821 if (inode == osb->root_inode) { 813 if (inode == osb->root_inode) {
@@ -828,11 +820,8 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
828 * have to skip deleting this guy. That's OK though because 820 * have to skip deleting this guy. That's OK though because
829 * the node who's doing the actual deleting should handle it 821 * the node who's doing the actual deleting should handle it
830 * anyway. */ 822 * anyway. */
831 if (current == osb->dc_task) { 823 if (current == osb->dc_task)
832 mlog(0, "Skipping delete of %lu because we're currently "
833 "in downconvert\n", inode->i_ino);
834 goto bail; 824 goto bail;
835 }
836 825
837 spin_lock(&oi->ip_lock); 826 spin_lock(&oi->ip_lock);
838 /* OCFS2 *never* deletes system files. This should technically 827 /* OCFS2 *never* deletes system files. This should technically
@@ -846,12 +835,9 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
846 835
847 /* If we have allowd wipe of this inode for another node, it 836 /* If we have allowd wipe of this inode for another node, it
848 * will be marked here so we can safely skip it. Recovery will 837 * will be marked here so we can safely skip it. Recovery will
849 * cleanup any inodes we might inadvertantly skip here. */ 838 * cleanup any inodes we might inadvertently skip here. */
850 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { 839 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE)
851 mlog(0, "Skipping delete of %lu because another node "
852 "has done this for us.\n", inode->i_ino);
853 goto bail_unlock; 840 goto bail_unlock;
854 }
855 841
856 ret = 1; 842 ret = 1;
857bail_unlock: 843bail_unlock:
@@ -868,28 +854,27 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
868 struct buffer_head *di_bh, 854 struct buffer_head *di_bh,
869 int *wipe) 855 int *wipe)
870{ 856{
871 int status = 0; 857 int status = 0, reason = 0;
872 struct ocfs2_inode_info *oi = OCFS2_I(inode); 858 struct ocfs2_inode_info *oi = OCFS2_I(inode);
873 struct ocfs2_dinode *di; 859 struct ocfs2_dinode *di;
874 860
875 *wipe = 0; 861 *wipe = 0;
876 862
863 trace_ocfs2_query_inode_wipe_begin((unsigned long long)oi->ip_blkno,
864 inode->i_nlink);
865
877 /* While we were waiting for the cluster lock in 866 /* While we were waiting for the cluster lock in
878 * ocfs2_delete_inode, another node might have asked to delete 867 * ocfs2_delete_inode, another node might have asked to delete
879 * the inode. Recheck our flags to catch this. */ 868 * the inode. Recheck our flags to catch this. */
880 if (!ocfs2_inode_is_valid_to_delete(inode)) { 869 if (!ocfs2_inode_is_valid_to_delete(inode)) {
881 mlog(0, "Skipping delete of %llu because flags changed\n", 870 reason = 1;
882 (unsigned long long)oi->ip_blkno);
883 goto bail; 871 goto bail;
884 } 872 }
885 873
886 /* Now that we have an up to date inode, we can double check 874 /* Now that we have an up to date inode, we can double check
887 * the link count. */ 875 * the link count. */
888 if (inode->i_nlink) { 876 if (inode->i_nlink)
889 mlog(0, "Skipping delete of %llu because nlink = %u\n",
890 (unsigned long long)oi->ip_blkno, inode->i_nlink);
891 goto bail; 877 goto bail;
892 }
893 878
894 /* Do some basic inode verification... */ 879 /* Do some basic inode verification... */
895 di = (struct ocfs2_dinode *) di_bh->b_data; 880 di = (struct ocfs2_dinode *) di_bh->b_data;
@@ -904,9 +889,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
904 * ORPHANED_FL not. 889 * ORPHANED_FL not.
905 */ 890 */
906 if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { 891 if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
907 mlog(0, "Reflinked inode %llu is no longer orphaned. " 892 reason = 2;
908 "it shouldn't be deleted\n",
909 (unsigned long long)oi->ip_blkno);
910 goto bail; 893 goto bail;
911 } 894 }
912 895
@@ -934,7 +917,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
934 * the inode open lock in ocfs2_read_locked_inode(). When we 917 * the inode open lock in ocfs2_read_locked_inode(). When we
935 * get to ->delete_inode(), each node tries to convert it's 918 * get to ->delete_inode(), each node tries to convert it's
936 * lock to an exclusive. Trylocks are serialized by the inode 919 * lock to an exclusive. Trylocks are serialized by the inode
937 * meta data lock. If the upconvert suceeds, we know the inode 920 * meta data lock. If the upconvert succeeds, we know the inode
938 * is no longer live and can be deleted. 921 * is no longer live and can be deleted.
939 * 922 *
940 * Though we call this with the meta data lock held, the 923 * Though we call this with the meta data lock held, the
@@ -943,8 +926,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
943 status = ocfs2_try_open_lock(inode, 1); 926 status = ocfs2_try_open_lock(inode, 1);
944 if (status == -EAGAIN) { 927 if (status == -EAGAIN) {
945 status = 0; 928 status = 0;
946 mlog(0, "Skipping delete of %llu because it is in use on " 929 reason = 3;
947 "other nodes\n", (unsigned long long)oi->ip_blkno);
948 goto bail; 930 goto bail;
949 } 931 }
950 if (status < 0) { 932 if (status < 0) {
@@ -953,11 +935,10 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
953 } 935 }
954 936
955 *wipe = 1; 937 *wipe = 1;
956 mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n", 938 trace_ocfs2_query_inode_wipe_succ(le16_to_cpu(di->i_orphaned_slot));
957 (unsigned long long)oi->ip_blkno,
958 le16_to_cpu(di->i_orphaned_slot));
959 939
960bail: 940bail:
941 trace_ocfs2_query_inode_wipe_end(status, reason);
961 return status; 942 return status;
962} 943}
963 944
@@ -967,8 +948,8 @@ bail:
967static void ocfs2_cleanup_delete_inode(struct inode *inode, 948static void ocfs2_cleanup_delete_inode(struct inode *inode,
968 int sync_data) 949 int sync_data)
969{ 950{
970 mlog(0, "Cleanup inode %llu, sync = %d\n", 951 trace_ocfs2_cleanup_delete_inode(
971 (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); 952 (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
972 if (sync_data) 953 if (sync_data)
973 write_inode_now(inode, 1); 954 write_inode_now(inode, 1);
974 truncate_inode_pages(&inode->i_data, 0); 955 truncate_inode_pages(&inode->i_data, 0);
@@ -980,15 +961,15 @@ static void ocfs2_delete_inode(struct inode *inode)
980 sigset_t oldset; 961 sigset_t oldset;
981 struct buffer_head *di_bh = NULL; 962 struct buffer_head *di_bh = NULL;
982 963
983 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 964 trace_ocfs2_delete_inode(inode->i_ino,
965 (unsigned long long)OCFS2_I(inode)->ip_blkno,
966 is_bad_inode(inode));
984 967
985 /* When we fail in read_inode() we mark inode as bad. The second test 968 /* When we fail in read_inode() we mark inode as bad. The second test
986 * catches the case when inode allocation fails before allocating 969 * catches the case when inode allocation fails before allocating
987 * a block for inode. */ 970 * a block for inode. */
988 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) { 971 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno)
989 mlog(0, "Skipping delete of bad inode\n");
990 goto bail; 972 goto bail;
991 }
992 973
993 dquot_initialize(inode); 974 dquot_initialize(inode);
994 975
@@ -1080,7 +1061,7 @@ bail_unlock_nfs_sync:
1080bail_unblock: 1061bail_unblock:
1081 ocfs2_unblock_signals(&oldset); 1062 ocfs2_unblock_signals(&oldset);
1082bail: 1063bail:
1083 mlog_exit_void(); 1064 return;
1084} 1065}
1085 1066
1086static void ocfs2_clear_inode(struct inode *inode) 1067static void ocfs2_clear_inode(struct inode *inode)
@@ -1088,11 +1069,9 @@ static void ocfs2_clear_inode(struct inode *inode)
1088 int status; 1069 int status;
1089 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1070 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1090 1071
1091 mlog_entry_void();
1092
1093 end_writeback(inode); 1072 end_writeback(inode);
1094 mlog(0, "Clearing inode: %llu, nlink = %u\n", 1073 trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
1095 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); 1074 inode->i_nlink);
1096 1075
1097 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, 1076 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
1098 "Inode=%lu\n", inode->i_ino); 1077 "Inode=%lu\n", inode->i_ino);
@@ -1181,8 +1160,6 @@ static void ocfs2_clear_inode(struct inode *inode)
1181 */ 1160 */
1182 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, 1161 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1183 &oi->ip_jinode); 1162 &oi->ip_jinode);
1184
1185 mlog_exit_void();
1186} 1163}
1187 1164
1188void ocfs2_evict_inode(struct inode *inode) 1165void ocfs2_evict_inode(struct inode *inode)
@@ -1204,17 +1181,14 @@ int ocfs2_drop_inode(struct inode *inode)
1204 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1181 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1205 int res; 1182 int res;
1206 1183
1207 mlog_entry_void(); 1184 trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
1208 1185 inode->i_nlink, oi->ip_flags);
1209 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
1210 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
1211 1186
1212 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) 1187 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1213 res = 1; 1188 res = 1;
1214 else 1189 else
1215 res = generic_drop_inode(inode); 1190 res = generic_drop_inode(inode);
1216 1191
1217 mlog_exit_void();
1218 return res; 1192 return res;
1219} 1193}
1220 1194
@@ -1226,11 +1200,11 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
1226 struct inode *inode = dentry->d_inode; 1200 struct inode *inode = dentry->d_inode;
1227 int status = 0; 1201 int status = 0;
1228 1202
1229 mlog_entry("(inode = 0x%p, ino = %llu)\n", inode, 1203 trace_ocfs2_inode_revalidate(inode,
1230 inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL); 1204 inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL,
1205 inode ? (unsigned long long)OCFS2_I(inode)->ip_flags : 0);
1231 1206
1232 if (!inode) { 1207 if (!inode) {
1233 mlog(0, "eep, no inode!\n");
1234 status = -ENOENT; 1208 status = -ENOENT;
1235 goto bail; 1209 goto bail;
1236 } 1210 }
@@ -1238,7 +1212,6 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
1238 spin_lock(&OCFS2_I(inode)->ip_lock); 1212 spin_lock(&OCFS2_I(inode)->ip_lock);
1239 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 1213 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
1240 spin_unlock(&OCFS2_I(inode)->ip_lock); 1214 spin_unlock(&OCFS2_I(inode)->ip_lock);
1241 mlog(0, "inode deleted!\n");
1242 status = -ENOENT; 1215 status = -ENOENT;
1243 goto bail; 1216 goto bail;
1244 } 1217 }
@@ -1254,8 +1227,6 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
1254 } 1227 }
1255 ocfs2_inode_unlock(inode, 0); 1228 ocfs2_inode_unlock(inode, 0);
1256bail: 1229bail:
1257 mlog_exit(status);
1258
1259 return status; 1230 return status;
1260} 1231}
1261 1232
@@ -1271,8 +1242,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1271 int status; 1242 int status;
1272 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 1243 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
1273 1244
1274 mlog_entry("(inode %llu)\n", 1245 trace_ocfs2_mark_inode_dirty((unsigned long long)OCFS2_I(inode)->ip_blkno);
1275 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1276 1246
1277 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, 1247 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1278 OCFS2_JOURNAL_ACCESS_WRITE); 1248 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1302,7 +1272,6 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1302 1272
1303 ocfs2_journal_dirty(handle, bh); 1273 ocfs2_journal_dirty(handle, bh);
1304leave: 1274leave:
1305 mlog_exit(status);
1306 return status; 1275 return status;
1307} 1276}
1308 1277
@@ -1345,8 +1314,7 @@ int ocfs2_validate_inode_block(struct super_block *sb,
1345 int rc; 1314 int rc;
1346 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1315 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1347 1316
1348 mlog(0, "Validating dinode %llu\n", 1317 trace_ocfs2_validate_inode_block((unsigned long long)bh->b_blocknr);
1349 (unsigned long long)bh->b_blocknr);
1350 1318
1351 BUG_ON(!buffer_uptodate(bh)); 1319 BUG_ON(!buffer_uptodate(bh));
1352 1320
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7a4868196152..8f13c5989eae 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -9,7 +9,6 @@
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/compat.h> 10#include <linux/compat.h>
11 11
12#define MLOG_MASK_PREFIX ML_INODE
13#include <cluster/masklog.h> 12#include <cluster/masklog.h>
14 13
15#include "ocfs2.h" 14#include "ocfs2.h"
@@ -46,6 +45,22 @@ static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
46#define o2info_set_request_error(a, b) \ 45#define o2info_set_request_error(a, b) \
47 __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) 46 __o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
48 47
48static inline void __o2info_set_request_filled(struct ocfs2_info_request *req)
49{
50 req->ir_flags |= OCFS2_INFO_FL_FILLED;
51}
52
53#define o2info_set_request_filled(a) \
54 __o2info_set_request_filled((struct ocfs2_info_request *)&(a))
55
56static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req)
57{
58 req->ir_flags &= ~OCFS2_INFO_FL_FILLED;
59}
60
61#define o2info_clear_request_filled(a) \
62 __o2info_clear_request_filled((struct ocfs2_info_request *)&(a))
63
49static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) 64static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
50{ 65{
51 int status; 66 int status;
@@ -59,7 +74,6 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
59 *flags = OCFS2_I(inode)->ip_attr; 74 *flags = OCFS2_I(inode)->ip_attr;
60 ocfs2_inode_unlock(inode, 0); 75 ocfs2_inode_unlock(inode, 0);
61 76
62 mlog_exit(status);
63 return status; 77 return status;
64} 78}
65 79
@@ -82,7 +96,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
82 } 96 }
83 97
84 status = -EACCES; 98 status = -EACCES;
85 if (!is_owner_or_cap(inode)) 99 if (!inode_owner_or_capable(inode))
86 goto bail_unlock; 100 goto bail_unlock;
87 101
88 if (!S_ISDIR(inode->i_mode)) 102 if (!S_ISDIR(inode->i_mode))
@@ -125,7 +139,6 @@ bail:
125 139
126 brelse(bh); 140 brelse(bh);
127 141
128 mlog_exit(status);
129 return status; 142 return status;
130} 143}
131 144
@@ -139,7 +152,8 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
139 goto bail; 152 goto bail;
140 153
141 oib.ib_blocksize = inode->i_sb->s_blocksize; 154 oib.ib_blocksize = inode->i_sb->s_blocksize;
142 oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; 155
156 o2info_set_request_filled(oib);
143 157
144 if (o2info_to_user(oib, req)) 158 if (o2info_to_user(oib, req))
145 goto bail; 159 goto bail;
@@ -163,7 +177,8 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
163 goto bail; 177 goto bail;
164 178
165 oic.ic_clustersize = osb->s_clustersize; 179 oic.ic_clustersize = osb->s_clustersize;
166 oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; 180
181 o2info_set_request_filled(oic);
167 182
168 if (o2info_to_user(oic, req)) 183 if (o2info_to_user(oic, req))
169 goto bail; 184 goto bail;
@@ -187,7 +202,8 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
187 goto bail; 202 goto bail;
188 203
189 oim.im_max_slots = osb->max_slots; 204 oim.im_max_slots = osb->max_slots;
190 oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; 205
206 o2info_set_request_filled(oim);
191 207
192 if (o2info_to_user(oim, req)) 208 if (o2info_to_user(oim, req))
193 goto bail; 209 goto bail;
@@ -211,7 +227,8 @@ int ocfs2_info_handle_label(struct inode *inode,
211 goto bail; 227 goto bail;
212 228
213 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); 229 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
214 oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; 230
231 o2info_set_request_filled(oil);
215 232
216 if (o2info_to_user(oil, req)) 233 if (o2info_to_user(oil, req))
217 goto bail; 234 goto bail;
@@ -235,7 +252,8 @@ int ocfs2_info_handle_uuid(struct inode *inode,
235 goto bail; 252 goto bail;
236 253
237 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); 254 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
238 oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; 255
256 o2info_set_request_filled(oiu);
239 257
240 if (o2info_to_user(oiu, req)) 258 if (o2info_to_user(oiu, req))
241 goto bail; 259 goto bail;
@@ -261,7 +279,8 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
261 oif.if_compat_features = osb->s_feature_compat; 279 oif.if_compat_features = osb->s_feature_compat;
262 oif.if_incompat_features = osb->s_feature_incompat; 280 oif.if_incompat_features = osb->s_feature_incompat;
263 oif.if_ro_compat_features = osb->s_feature_ro_compat; 281 oif.if_ro_compat_features = osb->s_feature_ro_compat;
264 oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; 282
283 o2info_set_request_filled(oif);
265 284
266 if (o2info_to_user(oif, req)) 285 if (o2info_to_user(oif, req))
267 goto bail; 286 goto bail;
@@ -286,7 +305,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
286 305
287 oij.ij_journal_size = osb->journal->j_inode->i_size; 306 oij.ij_journal_size = osb->journal->j_inode->i_size;
288 307
289 oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; 308 o2info_set_request_filled(oij);
290 309
291 if (o2info_to_user(oij, req)) 310 if (o2info_to_user(oij, req))
292 goto bail; 311 goto bail;
@@ -308,7 +327,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
308 if (o2info_from_user(oir, req)) 327 if (o2info_from_user(oir, req))
309 goto bail; 328 goto bail;
310 329
311 oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; 330 o2info_clear_request_filled(oir);
312 331
313 if (o2info_to_user(oir, req)) 332 if (o2info_to_user(oir, req))
314 goto bail; 333 goto bail;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index faa2303dbf0a..b141a44605ca 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -31,7 +31,6 @@
31#include <linux/time.h> 31#include <linux/time.h>
32#include <linux/random.h> 32#include <linux/random.h>
33 33
34#define MLOG_MASK_PREFIX ML_JOURNAL
35#include <cluster/masklog.h> 34#include <cluster/masklog.h>
36 35
37#include "ocfs2.h" 36#include "ocfs2.h"
@@ -52,6 +51,7 @@
52#include "quota.h" 51#include "quota.h"
53 52
54#include "buffer_head_io.h" 53#include "buffer_head_io.h"
54#include "ocfs2_trace.h"
55 55
56DEFINE_SPINLOCK(trans_inc_lock); 56DEFINE_SPINLOCK(trans_inc_lock);
57 57
@@ -303,16 +303,15 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
303 unsigned int flushed; 303 unsigned int flushed;
304 struct ocfs2_journal *journal = NULL; 304 struct ocfs2_journal *journal = NULL;
305 305
306 mlog_entry_void();
307
308 journal = osb->journal; 306 journal = osb->journal;
309 307
310 /* Flush all pending commits and checkpoint the journal. */ 308 /* Flush all pending commits and checkpoint the journal. */
311 down_write(&journal->j_trans_barrier); 309 down_write(&journal->j_trans_barrier);
312 310
313 if (atomic_read(&journal->j_num_trans) == 0) { 311 flushed = atomic_read(&journal->j_num_trans);
312 trace_ocfs2_commit_cache_begin(flushed);
313 if (flushed == 0) {
314 up_write(&journal->j_trans_barrier); 314 up_write(&journal->j_trans_barrier);
315 mlog(0, "No transactions for me to flush!\n");
316 goto finally; 315 goto finally;
317 } 316 }
318 317
@@ -331,13 +330,11 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
331 atomic_set(&journal->j_num_trans, 0); 330 atomic_set(&journal->j_num_trans, 0);
332 up_write(&journal->j_trans_barrier); 331 up_write(&journal->j_trans_barrier);
333 332
334 mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", 333 trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
335 journal->j_trans_id, flushed);
336 334
337 ocfs2_wake_downconvert_thread(osb); 335 ocfs2_wake_downconvert_thread(osb);
338 wake_up(&journal->j_checkpointed); 336 wake_up(&journal->j_checkpointed);
339finally: 337finally:
340 mlog_exit(status);
341 return status; 338 return status;
342} 339}
343 340
@@ -425,9 +422,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
425 return 0; 422 return 0;
426 423
427 old_nblocks = handle->h_buffer_credits; 424 old_nblocks = handle->h_buffer_credits;
428 mlog_entry_void();
429 425
430 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); 426 trace_ocfs2_extend_trans(old_nblocks, nblocks);
431 427
432#ifdef CONFIG_OCFS2_DEBUG_FS 428#ifdef CONFIG_OCFS2_DEBUG_FS
433 status = 1; 429 status = 1;
@@ -440,9 +436,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
440#endif 436#endif
441 437
442 if (status > 0) { 438 if (status > 0) {
443 mlog(0, 439 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
444 "jbd2_journal_extend failed, trying "
445 "jbd2_journal_restart\n");
446 status = jbd2_journal_restart(handle, 440 status = jbd2_journal_restart(handle,
447 old_nblocks + nblocks); 441 old_nblocks + nblocks);
448 if (status < 0) { 442 if (status < 0) {
@@ -453,8 +447,6 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
453 447
454 status = 0; 448 status = 0;
455bail: 449bail:
456
457 mlog_exit(status);
458 return status; 450 return status;
459} 451}
460 452
@@ -622,12 +614,9 @@ static int __ocfs2_journal_access(handle_t *handle,
622 BUG_ON(!handle); 614 BUG_ON(!handle);
623 BUG_ON(!bh); 615 BUG_ON(!bh);
624 616
625 mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n", 617 trace_ocfs2_journal_access(
626 (unsigned long long)bh->b_blocknr, type, 618 (unsigned long long)ocfs2_metadata_cache_owner(ci),
627 (type == OCFS2_JOURNAL_ACCESS_CREATE) ? 619 (unsigned long long)bh->b_blocknr, type, bh->b_size);
628 "OCFS2_JOURNAL_ACCESS_CREATE" :
629 "OCFS2_JOURNAL_ACCESS_WRITE",
630 bh->b_size);
631 620
632 /* we can safely remove this assertion after testing. */ 621 /* we can safely remove this assertion after testing. */
633 if (!buffer_uptodate(bh)) { 622 if (!buffer_uptodate(bh)) {
@@ -668,7 +657,6 @@ static int __ocfs2_journal_access(handle_t *handle,
668 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", 657 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
669 status, type); 658 status, type);
670 659
671 mlog_exit(status);
672 return status; 660 return status;
673} 661}
674 662
@@ -737,13 +725,10 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
737{ 725{
738 int status; 726 int status;
739 727
740 mlog_entry("(bh->b_blocknr=%llu)\n", 728 trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
741 (unsigned long long)bh->b_blocknr);
742 729
743 status = jbd2_journal_dirty_metadata(handle, bh); 730 status = jbd2_journal_dirty_metadata(handle, bh);
744 BUG_ON(status); 731 BUG_ON(status);
745
746 mlog_exit_void();
747} 732}
748 733
749#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 734#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
@@ -775,8 +760,6 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
775 struct ocfs2_super *osb; 760 struct ocfs2_super *osb;
776 int inode_lock = 0; 761 int inode_lock = 0;
777 762
778 mlog_entry_void();
779
780 BUG_ON(!journal); 763 BUG_ON(!journal);
781 764
782 osb = journal->j_osb; 765 osb = journal->j_osb;
@@ -820,10 +803,9 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
820 goto done; 803 goto done;
821 } 804 }
822 805
823 mlog(0, "inode->i_size = %lld\n", inode->i_size); 806 trace_ocfs2_journal_init(inode->i_size,
824 mlog(0, "inode->i_blocks = %llu\n", 807 (unsigned long long)inode->i_blocks,
825 (unsigned long long)inode->i_blocks); 808 OCFS2_I(inode)->ip_clusters);
826 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
827 809
828 /* call the kernels journal init function now */ 810 /* call the kernels journal init function now */
829 j_journal = jbd2_journal_init_inode(inode); 811 j_journal = jbd2_journal_init_inode(inode);
@@ -833,8 +815,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
833 goto done; 815 goto done;
834 } 816 }
835 817
836 mlog(0, "Returned from jbd2_journal_init_inode\n"); 818 trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
837 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
838 819
839 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & 820 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
840 OCFS2_JOURNAL_DIRTY_FL); 821 OCFS2_JOURNAL_DIRTY_FL);
@@ -859,7 +840,6 @@ done:
859 } 840 }
860 } 841 }
861 842
862 mlog_exit(status);
863 return status; 843 return status;
864} 844}
865 845
@@ -882,8 +862,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
882 struct buffer_head *bh = journal->j_bh; 862 struct buffer_head *bh = journal->j_bh;
883 struct ocfs2_dinode *fe; 863 struct ocfs2_dinode *fe;
884 864
885 mlog_entry_void();
886
887 fe = (struct ocfs2_dinode *)bh->b_data; 865 fe = (struct ocfs2_dinode *)bh->b_data;
888 866
889 /* The journal bh on the osb always comes from ocfs2_journal_init() 867 /* The journal bh on the osb always comes from ocfs2_journal_init()
@@ -906,7 +884,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
906 if (status < 0) 884 if (status < 0)
907 mlog_errno(status); 885 mlog_errno(status);
908 886
909 mlog_exit(status);
910 return status; 887 return status;
911} 888}
912 889
@@ -921,8 +898,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
921 struct inode *inode = NULL; 898 struct inode *inode = NULL;
922 int num_running_trans = 0; 899 int num_running_trans = 0;
923 900
924 mlog_entry_void();
925
926 BUG_ON(!osb); 901 BUG_ON(!osb);
927 902
928 journal = osb->journal; 903 journal = osb->journal;
@@ -939,10 +914,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
939 BUG(); 914 BUG();
940 915
941 num_running_trans = atomic_read(&(osb->journal->j_num_trans)); 916 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
942 if (num_running_trans > 0) 917 trace_ocfs2_journal_shutdown(num_running_trans);
943 mlog(0, "Shutting down journal: must wait on %d "
944 "running transactions!\n",
945 num_running_trans);
946 918
947 /* Do a commit_cache here. It will flush our journal, *and* 919 /* Do a commit_cache here. It will flush our journal, *and*
948 * release any locks that are still held. 920 * release any locks that are still held.
@@ -955,7 +927,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
955 * completely destroy the journal. */ 927 * completely destroy the journal. */
956 if (osb->commit_task) { 928 if (osb->commit_task) {
957 /* Wait for the commit thread */ 929 /* Wait for the commit thread */
958 mlog(0, "Waiting for ocfs2commit to exit....\n"); 930 trace_ocfs2_journal_shutdown_wait(osb->commit_task);
959 kthread_stop(osb->commit_task); 931 kthread_stop(osb->commit_task);
960 osb->commit_task = NULL; 932 osb->commit_task = NULL;
961 } 933 }
@@ -998,7 +970,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
998done: 970done:
999 if (inode) 971 if (inode)
1000 iput(inode); 972 iput(inode);
1001 mlog_exit_void();
1002} 973}
1003 974
1004static void ocfs2_clear_journal_error(struct super_block *sb, 975static void ocfs2_clear_journal_error(struct super_block *sb,
@@ -1024,8 +995,6 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1024 int status = 0; 995 int status = 0;
1025 struct ocfs2_super *osb; 996 struct ocfs2_super *osb;
1026 997
1027 mlog_entry_void();
1028
1029 BUG_ON(!journal); 998 BUG_ON(!journal);
1030 999
1031 osb = journal->j_osb; 1000 osb = journal->j_osb;
@@ -1059,7 +1028,6 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1059 osb->commit_task = NULL; 1028 osb->commit_task = NULL;
1060 1029
1061done: 1030done:
1062 mlog_exit(status);
1063 return status; 1031 return status;
1064} 1032}
1065 1033
@@ -1070,8 +1038,6 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1070{ 1038{
1071 int status; 1039 int status;
1072 1040
1073 mlog_entry_void();
1074
1075 BUG_ON(!journal); 1041 BUG_ON(!journal);
1076 1042
1077 status = jbd2_journal_wipe(journal->j_journal, full); 1043 status = jbd2_journal_wipe(journal->j_journal, full);
@@ -1085,7 +1051,6 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1085 mlog_errno(status); 1051 mlog_errno(status);
1086 1052
1087bail: 1053bail:
1088 mlog_exit(status);
1089 return status; 1054 return status;
1090} 1055}
1091 1056
@@ -1124,8 +1089,6 @@ static int ocfs2_force_read_journal(struct inode *inode)
1124#define CONCURRENT_JOURNAL_FILL 32ULL 1089#define CONCURRENT_JOURNAL_FILL 32ULL
1125 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; 1090 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
1126 1091
1127 mlog_entry_void();
1128
1129 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); 1092 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
1130 1093
1131 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); 1094 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);
@@ -1161,7 +1124,6 @@ static int ocfs2_force_read_journal(struct inode *inode)
1161bail: 1124bail:
1162 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) 1125 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
1163 brelse(bhs[i]); 1126 brelse(bhs[i]);
1164 mlog_exit(status);
1165 return status; 1127 return status;
1166} 1128}
1167 1129
@@ -1185,7 +1147,7 @@ struct ocfs2_la_recovery_item {
1185 */ 1147 */
1186void ocfs2_complete_recovery(struct work_struct *work) 1148void ocfs2_complete_recovery(struct work_struct *work)
1187{ 1149{
1188 int ret; 1150 int ret = 0;
1189 struct ocfs2_journal *journal = 1151 struct ocfs2_journal *journal =
1190 container_of(work, struct ocfs2_journal, j_recovery_work); 1152 container_of(work, struct ocfs2_journal, j_recovery_work);
1191 struct ocfs2_super *osb = journal->j_osb; 1153 struct ocfs2_super *osb = journal->j_osb;
@@ -1194,9 +1156,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
1194 struct ocfs2_quota_recovery *qrec; 1156 struct ocfs2_quota_recovery *qrec;
1195 LIST_HEAD(tmp_la_list); 1157 LIST_HEAD(tmp_la_list);
1196 1158
1197 mlog_entry_void(); 1159 trace_ocfs2_complete_recovery(
1198 1160 (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
1199 mlog(0, "completing recovery from keventd\n");
1200 1161
1201 spin_lock(&journal->j_lock); 1162 spin_lock(&journal->j_lock);
1202 list_splice_init(&journal->j_la_cleanups, &tmp_la_list); 1163 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
@@ -1205,15 +1166,18 @@ void ocfs2_complete_recovery(struct work_struct *work)
1205 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { 1166 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1206 list_del_init(&item->lri_list); 1167 list_del_init(&item->lri_list);
1207 1168
1208 mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
1209
1210 ocfs2_wait_on_quotas(osb); 1169 ocfs2_wait_on_quotas(osb);
1211 1170
1212 la_dinode = item->lri_la_dinode; 1171 la_dinode = item->lri_la_dinode;
1213 if (la_dinode) { 1172 tl_dinode = item->lri_tl_dinode;
1214 mlog(0, "Clean up local alloc %llu\n", 1173 qrec = item->lri_qrec;
1215 (unsigned long long)le64_to_cpu(la_dinode->i_blkno)); 1174
1175 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1176 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
1177 tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
1178 qrec);
1216 1179
1180 if (la_dinode) {
1217 ret = ocfs2_complete_local_alloc_recovery(osb, 1181 ret = ocfs2_complete_local_alloc_recovery(osb,
1218 la_dinode); 1182 la_dinode);
1219 if (ret < 0) 1183 if (ret < 0)
@@ -1222,11 +1186,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1222 kfree(la_dinode); 1186 kfree(la_dinode);
1223 } 1187 }
1224 1188
1225 tl_dinode = item->lri_tl_dinode;
1226 if (tl_dinode) { 1189 if (tl_dinode) {
1227 mlog(0, "Clean up truncate log %llu\n",
1228 (unsigned long long)le64_to_cpu(tl_dinode->i_blkno));
1229
1230 ret = ocfs2_complete_truncate_log_recovery(osb, 1190 ret = ocfs2_complete_truncate_log_recovery(osb,
1231 tl_dinode); 1191 tl_dinode);
1232 if (ret < 0) 1192 if (ret < 0)
@@ -1239,9 +1199,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1239 if (ret < 0) 1199 if (ret < 0)
1240 mlog_errno(ret); 1200 mlog_errno(ret);
1241 1201
1242 qrec = item->lri_qrec;
1243 if (qrec) { 1202 if (qrec) {
1244 mlog(0, "Recovering quota files");
1245 ret = ocfs2_finish_quota_recovery(osb, qrec, 1203 ret = ocfs2_finish_quota_recovery(osb, qrec,
1246 item->lri_slot); 1204 item->lri_slot);
1247 if (ret < 0) 1205 if (ret < 0)
@@ -1252,8 +1210,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1252 kfree(item); 1210 kfree(item);
1253 } 1211 }
1254 1212
1255 mlog(0, "Recovery completion\n"); 1213 trace_ocfs2_complete_recovery_end(ret);
1256 mlog_exit_void();
1257} 1214}
1258 1215
1259/* NOTE: This function always eats your references to la_dinode and 1216/* NOTE: This function always eats your references to la_dinode and
@@ -1339,8 +1296,6 @@ static int __ocfs2_recovery_thread(void *arg)
1339 int rm_quota_used = 0, i; 1296 int rm_quota_used = 0, i;
1340 struct ocfs2_quota_recovery *qrec; 1297 struct ocfs2_quota_recovery *qrec;
1341 1298
1342 mlog_entry_void();
1343
1344 status = ocfs2_wait_on_mount(osb); 1299 status = ocfs2_wait_on_mount(osb);
1345 if (status < 0) { 1300 if (status < 0) {
1346 goto bail; 1301 goto bail;
@@ -1372,15 +1327,12 @@ restart:
1372 * clear it until ocfs2_recover_node() has succeeded. */ 1327 * clear it until ocfs2_recover_node() has succeeded. */
1373 node_num = rm->rm_entries[0]; 1328 node_num = rm->rm_entries[0];
1374 spin_unlock(&osb->osb_lock); 1329 spin_unlock(&osb->osb_lock);
1375 mlog(0, "checking node %d\n", node_num);
1376 slot_num = ocfs2_node_num_to_slot(osb, node_num); 1330 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1331 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1377 if (slot_num == -ENOENT) { 1332 if (slot_num == -ENOENT) {
1378 status = 0; 1333 status = 0;
1379 mlog(0, "no slot for this node, so no recovery"
1380 "required.\n");
1381 goto skip_recovery; 1334 goto skip_recovery;
1382 } 1335 }
1383 mlog(0, "node %d was using slot %d\n", node_num, slot_num);
1384 1336
1385 /* It is a bit subtle with quota recovery. We cannot do it 1337 /* It is a bit subtle with quota recovery. We cannot do it
1386 * immediately because we have to obtain cluster locks from 1338 * immediately because we have to obtain cluster locks from
@@ -1407,7 +1359,7 @@ skip_recovery:
1407 spin_lock(&osb->osb_lock); 1359 spin_lock(&osb->osb_lock);
1408 } 1360 }
1409 spin_unlock(&osb->osb_lock); 1361 spin_unlock(&osb->osb_lock);
1410 mlog(0, "All nodes recovered\n"); 1362 trace_ocfs2_recovery_thread_end(status);
1411 1363
1412 /* Refresh all journal recovery generations from disk */ 1364 /* Refresh all journal recovery generations from disk */
1413 status = ocfs2_check_journals_nolocks(osb); 1365 status = ocfs2_check_journals_nolocks(osb);
@@ -1416,7 +1368,7 @@ skip_recovery:
1416 mlog_errno(status); 1368 mlog_errno(status);
1417 1369
1418 /* Now it is right time to recover quotas... We have to do this under 1370 /* Now it is right time to recover quotas... We have to do this under
1419 * superblock lock so that noone can start using the slot (and crash) 1371 * superblock lock so that no one can start using the slot (and crash)
1420 * before we recover it */ 1372 * before we recover it */
1421 for (i = 0; i < rm_quota_used; i++) { 1373 for (i = 0; i < rm_quota_used; i++) {
1422 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); 1374 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
@@ -1451,7 +1403,6 @@ bail:
1451 if (rm_quota) 1403 if (rm_quota)
1452 kfree(rm_quota); 1404 kfree(rm_quota);
1453 1405
1454 mlog_exit(status);
1455 /* no one is callint kthread_stop() for us so the kthread() api 1406 /* no one is callint kthread_stop() for us so the kthread() api
1456 * requires that we call do_exit(). And it isn't exported, but 1407 * requires that we call do_exit(). And it isn't exported, but
1457 * complete_and_exit() seems to be a minimal wrapper around it. */ 1408 * complete_and_exit() seems to be a minimal wrapper around it. */
@@ -1461,19 +1412,15 @@ bail:
1461 1412
1462void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) 1413void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1463{ 1414{
1464 mlog_entry("(node_num=%d, osb->node_num = %d)\n",
1465 node_num, osb->node_num);
1466
1467 mutex_lock(&osb->recovery_lock); 1415 mutex_lock(&osb->recovery_lock);
1468 if (osb->disable_recovery)
1469 goto out;
1470 1416
1471 /* People waiting on recovery will wait on 1417 trace_ocfs2_recovery_thread(node_num, osb->node_num,
1472 * the recovery map to empty. */ 1418 osb->disable_recovery, osb->recovery_thread_task,
1473 if (ocfs2_recovery_map_set(osb, node_num)) 1419 osb->disable_recovery ?
1474 mlog(0, "node %d already in recovery map.\n", node_num); 1420 -1 : ocfs2_recovery_map_set(osb, node_num));
1475 1421
1476 mlog(0, "starting recovery thread...\n"); 1422 if (osb->disable_recovery)
1423 goto out;
1477 1424
1478 if (osb->recovery_thread_task) 1425 if (osb->recovery_thread_task)
1479 goto out; 1426 goto out;
@@ -1488,8 +1435,6 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1488out: 1435out:
1489 mutex_unlock(&osb->recovery_lock); 1436 mutex_unlock(&osb->recovery_lock);
1490 wake_up(&osb->recovery_event); 1437 wake_up(&osb->recovery_event);
1491
1492 mlog_exit_void();
1493} 1438}
1494 1439
1495static int ocfs2_read_journal_inode(struct ocfs2_super *osb, 1440static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
@@ -1563,7 +1508,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1563 * If not, it needs recovery. 1508 * If not, it needs recovery.
1564 */ 1509 */
1565 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { 1510 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1566 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num, 1511 trace_ocfs2_replay_journal_recovered(slot_num,
1567 osb->slot_recovery_generations[slot_num], slot_reco_gen); 1512 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1568 osb->slot_recovery_generations[slot_num] = slot_reco_gen; 1513 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1569 status = -EBUSY; 1514 status = -EBUSY;
@@ -1574,7 +1519,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1574 1519
1575 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1520 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1576 if (status < 0) { 1521 if (status < 0) {
1577 mlog(0, "status returned from ocfs2_inode_lock=%d\n", status); 1522 trace_ocfs2_replay_journal_lock_err(status);
1578 if (status != -ERESTARTSYS) 1523 if (status != -ERESTARTSYS)
1579 mlog(ML_ERROR, "Could not lock journal!\n"); 1524 mlog(ML_ERROR, "Could not lock journal!\n");
1580 goto done; 1525 goto done;
@@ -1587,7 +1532,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1587 slot_reco_gen = ocfs2_get_recovery_generation(fe); 1532 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1588 1533
1589 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1534 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1590 mlog(0, "No recovery required for node %d\n", node_num); 1535 trace_ocfs2_replay_journal_skip(node_num);
1591 /* Refresh recovery generation for the slot */ 1536 /* Refresh recovery generation for the slot */
1592 osb->slot_recovery_generations[slot_num] = slot_reco_gen; 1537 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1593 goto done; 1538 goto done;
@@ -1608,7 +1553,6 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1608 goto done; 1553 goto done;
1609 } 1554 }
1610 1555
1611 mlog(0, "calling journal_init_inode\n");
1612 journal = jbd2_journal_init_inode(inode); 1556 journal = jbd2_journal_init_inode(inode);
1613 if (journal == NULL) { 1557 if (journal == NULL) {
1614 mlog(ML_ERROR, "Linux journal layer error\n"); 1558 mlog(ML_ERROR, "Linux journal layer error\n");
@@ -1628,7 +1572,6 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1628 ocfs2_clear_journal_error(osb->sb, journal, slot_num); 1572 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1629 1573
1630 /* wipe the journal */ 1574 /* wipe the journal */
1631 mlog(0, "flushing the journal.\n");
1632 jbd2_journal_lock_updates(journal); 1575 jbd2_journal_lock_updates(journal);
1633 status = jbd2_journal_flush(journal); 1576 status = jbd2_journal_flush(journal);
1634 jbd2_journal_unlock_updates(journal); 1577 jbd2_journal_unlock_updates(journal);
@@ -1665,7 +1608,6 @@ done:
1665 1608
1666 brelse(bh); 1609 brelse(bh);
1667 1610
1668 mlog_exit(status);
1669 return status; 1611 return status;
1670} 1612}
1671 1613
@@ -1688,8 +1630,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1688 struct ocfs2_dinode *la_copy = NULL; 1630 struct ocfs2_dinode *la_copy = NULL;
1689 struct ocfs2_dinode *tl_copy = NULL; 1631 struct ocfs2_dinode *tl_copy = NULL;
1690 1632
1691 mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n", 1633 trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
1692 node_num, slot_num, osb->node_num);
1693 1634
1694 /* Should not ever be called to recover ourselves -- in that 1635 /* Should not ever be called to recover ourselves -- in that
1695 * case we should've called ocfs2_journal_load instead. */ 1636 * case we should've called ocfs2_journal_load instead. */
@@ -1698,9 +1639,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1698 status = ocfs2_replay_journal(osb, node_num, slot_num); 1639 status = ocfs2_replay_journal(osb, node_num, slot_num);
1699 if (status < 0) { 1640 if (status < 0) {
1700 if (status == -EBUSY) { 1641 if (status == -EBUSY) {
1701 mlog(0, "Skipping recovery for slot %u (node %u) " 1642 trace_ocfs2_recover_node_skip(slot_num, node_num);
1702 "as another node has recovered it\n", slot_num,
1703 node_num);
1704 status = 0; 1643 status = 0;
1705 goto done; 1644 goto done;
1706 } 1645 }
@@ -1735,7 +1674,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1735 status = 0; 1674 status = 0;
1736done: 1675done:
1737 1676
1738 mlog_exit(status);
1739 return status; 1677 return status;
1740} 1678}
1741 1679
@@ -1808,8 +1746,8 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1808 spin_lock(&osb->osb_lock); 1746 spin_lock(&osb->osb_lock);
1809 osb->slot_recovery_generations[i] = gen; 1747 osb->slot_recovery_generations[i] = gen;
1810 1748
1811 mlog(0, "Slot %u recovery generation is %u\n", i, 1749 trace_ocfs2_mark_dead_nodes(i,
1812 osb->slot_recovery_generations[i]); 1750 osb->slot_recovery_generations[i]);
1813 1751
1814 if (i == osb->slot_num) { 1752 if (i == osb->slot_num) {
1815 spin_unlock(&osb->osb_lock); 1753 spin_unlock(&osb->osb_lock);
@@ -1845,7 +1783,6 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1845 1783
1846 status = 0; 1784 status = 0;
1847bail: 1785bail:
1848 mlog_exit(status);
1849 return status; 1786 return status;
1850} 1787}
1851 1788
@@ -1884,11 +1821,12 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1884 1821
1885 os = &osb->osb_orphan_scan; 1822 os = &osb->osb_orphan_scan;
1886 1823
1887 mlog(0, "Begin orphan scan\n");
1888
1889 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 1824 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1890 goto out; 1825 goto out;
1891 1826
1827 trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
1828 atomic_read(&os->os_state));
1829
1892 status = ocfs2_orphan_scan_lock(osb, &seqno); 1830 status = ocfs2_orphan_scan_lock(osb, &seqno);
1893 if (status < 0) { 1831 if (status < 0) {
1894 if (status != -EAGAIN) 1832 if (status != -EAGAIN)
@@ -1918,7 +1856,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1918unlock: 1856unlock:
1919 ocfs2_orphan_scan_unlock(osb, seqno); 1857 ocfs2_orphan_scan_unlock(osb, seqno);
1920out: 1858out:
1921 mlog(0, "Orphan scan completed\n"); 1859 trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
1860 atomic_read(&os->os_state));
1922 return; 1861 return;
1923} 1862}
1924 1863
@@ -2002,8 +1941,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
2002 if (IS_ERR(iter)) 1941 if (IS_ERR(iter))
2003 return 0; 1942 return 0;
2004 1943
2005 mlog(0, "queue orphan %llu\n", 1944 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2006 (unsigned long long)OCFS2_I(iter)->ip_blkno);
2007 /* No locking is required for the next_orphan queue as there 1945 /* No locking is required for the next_orphan queue as there
2008 * is only ever a single process doing orphan recovery. */ 1946 * is only ever a single process doing orphan recovery. */
2009 OCFS2_I(iter)->ip_next_orphan = p->head; 1947 OCFS2_I(iter)->ip_next_orphan = p->head;
@@ -2119,7 +2057,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2119 struct inode *iter; 2057 struct inode *iter;
2120 struct ocfs2_inode_info *oi; 2058 struct ocfs2_inode_info *oi;
2121 2059
2122 mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); 2060 trace_ocfs2_recover_orphans(slot);
2123 2061
2124 ocfs2_mark_recovering_orphan_dir(osb, slot); 2062 ocfs2_mark_recovering_orphan_dir(osb, slot);
2125 ret = ocfs2_queue_orphans(osb, slot, &inode); 2063 ret = ocfs2_queue_orphans(osb, slot, &inode);
@@ -2132,7 +2070,8 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2132 2070
2133 while (inode) { 2071 while (inode) {
2134 oi = OCFS2_I(inode); 2072 oi = OCFS2_I(inode);
2135 mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno); 2073 trace_ocfs2_recover_orphans_iput(
2074 (unsigned long long)oi->ip_blkno);
2136 2075
2137 iter = oi->ip_next_orphan; 2076 iter = oi->ip_next_orphan;
2138 2077
@@ -2170,6 +2109,7 @@ static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2170 * MOUNTED flag, but this is set right before 2109 * MOUNTED flag, but this is set right before
2171 * dismount_volume() so we can trust it. */ 2110 * dismount_volume() so we can trust it. */
2172 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) { 2111 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2112 trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
2173 mlog(0, "mount error, exiting!\n"); 2113 mlog(0, "mount error, exiting!\n");
2174 return -EBUSY; 2114 return -EBUSY;
2175 } 2115 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 6180da1e37e6..68cf2f6d3c6a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -215,7 +215,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
215 /* WARNING: This only kicks off a single 215 /* WARNING: This only kicks off a single
216 * checkpoint. If someone races you and adds more 216 * checkpoint. If someone races you and adds more
217 * metadata to the journal, you won't know, and will 217 * metadata to the journal, you won't know, and will
218 * wind up waiting *alot* longer than necessary. Right 218 * wind up waiting *a lot* longer than necessary. Right
219 * now we only use this in clear_inode so that's 219 * now we only use this in clear_inode so that's
220 * OK. */ 220 * OK. */
221 ocfs2_start_checkpoint(osb); 221 ocfs2_start_checkpoint(osb);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ec6adbf8f551..210c35237548 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -29,7 +29,6 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31 31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 32#include <cluster/masklog.h>
34 33
35#include "ocfs2.h" 34#include "ocfs2.h"
@@ -43,6 +42,7 @@
43#include "suballoc.h" 42#include "suballoc.h"
44#include "super.h" 43#include "super.h"
45#include "sysfile.h" 44#include "sysfile.h"
45#include "ocfs2_trace.h"
46 46
47#include "buffer_head_io.h" 47#include "buffer_head_io.h"
48 48
@@ -201,8 +201,7 @@ void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
201 la_max_mb = ocfs2_clusters_to_megabytes(sb, 201 la_max_mb = ocfs2_clusters_to_megabytes(sb,
202 ocfs2_local_alloc_size(sb) * 8); 202 ocfs2_local_alloc_size(sb) * 8);
203 203
204 mlog(0, "requested: %dM, max: %uM, default: %uM\n", 204 trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb);
205 requested_mb, la_max_mb, la_default_mb);
206 205
207 if (requested_mb == -1) { 206 if (requested_mb == -1) {
208 /* No user request - use defaults */ 207 /* No user request - use defaults */
@@ -276,8 +275,8 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
276 275
277 ret = 1; 276 ret = 1;
278bail: 277bail:
279 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 278 trace_ocfs2_alloc_should_use_local(
280 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 279 (unsigned long long)bits, osb->local_alloc_state, la_bits, ret);
281 spin_unlock(&osb->osb_lock); 280 spin_unlock(&osb->osb_lock);
282 return ret; 281 return ret;
283} 282}
@@ -291,8 +290,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
291 struct inode *inode = NULL; 290 struct inode *inode = NULL;
292 struct ocfs2_local_alloc *la; 291 struct ocfs2_local_alloc *la;
293 292
294 mlog_entry_void();
295
296 if (osb->local_alloc_bits == 0) 293 if (osb->local_alloc_bits == 0)
297 goto bail; 294 goto bail;
298 295
@@ -364,9 +361,10 @@ bail:
364 if (inode) 361 if (inode)
365 iput(inode); 362 iput(inode);
366 363
367 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); 364 trace_ocfs2_load_local_alloc(osb->local_alloc_bits);
368 365
369 mlog_exit(status); 366 if (status)
367 mlog_errno(status);
370 return status; 368 return status;
371} 369}
372 370
@@ -388,8 +386,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
388 struct ocfs2_dinode *alloc_copy = NULL; 386 struct ocfs2_dinode *alloc_copy = NULL;
389 struct ocfs2_dinode *alloc = NULL; 387 struct ocfs2_dinode *alloc = NULL;
390 388
391 mlog_entry_void();
392
393 cancel_delayed_work(&osb->la_enable_wq); 389 cancel_delayed_work(&osb->la_enable_wq);
394 flush_workqueue(ocfs2_wq); 390 flush_workqueue(ocfs2_wq);
395 391
@@ -482,8 +478,6 @@ out:
482 478
483 if (alloc_copy) 479 if (alloc_copy)
484 kfree(alloc_copy); 480 kfree(alloc_copy);
485
486 mlog_exit_void();
487} 481}
488 482
489/* 483/*
@@ -502,7 +496,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
502 struct inode *inode = NULL; 496 struct inode *inode = NULL;
503 struct ocfs2_dinode *alloc; 497 struct ocfs2_dinode *alloc;
504 498
505 mlog_entry("(slot_num = %d)\n", slot_num); 499 trace_ocfs2_begin_local_alloc_recovery(slot_num);
506 500
507 *alloc_copy = NULL; 501 *alloc_copy = NULL;
508 502
@@ -552,7 +546,8 @@ bail:
552 iput(inode); 546 iput(inode);
553 } 547 }
554 548
555 mlog_exit(status); 549 if (status)
550 mlog_errno(status);
556 return status; 551 return status;
557} 552}
558 553
@@ -570,8 +565,6 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
570 struct buffer_head *main_bm_bh = NULL; 565 struct buffer_head *main_bm_bh = NULL;
571 struct inode *main_bm_inode; 566 struct inode *main_bm_inode;
572 567
573 mlog_entry_void();
574
575 main_bm_inode = ocfs2_get_system_file_inode(osb, 568 main_bm_inode = ocfs2_get_system_file_inode(osb,
576 GLOBAL_BITMAP_SYSTEM_INODE, 569 GLOBAL_BITMAP_SYSTEM_INODE,
577 OCFS2_INVALID_SLOT); 570 OCFS2_INVALID_SLOT);
@@ -620,7 +613,8 @@ out_mutex:
620out: 613out:
621 if (!status) 614 if (!status)
622 ocfs2_init_steal_slots(osb); 615 ocfs2_init_steal_slots(osb);
623 mlog_exit(status); 616 if (status)
617 mlog_errno(status);
624 return status; 618 return status;
625} 619}
626 620
@@ -640,8 +634,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
640 struct inode *local_alloc_inode; 634 struct inode *local_alloc_inode;
641 unsigned int free_bits; 635 unsigned int free_bits;
642 636
643 mlog_entry_void();
644
645 BUG_ON(!ac); 637 BUG_ON(!ac);
646 638
647 local_alloc_inode = 639 local_alloc_inode =
@@ -712,10 +704,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
712 goto bail; 704 goto bail;
713 } 705 }
714 706
715 if (ac->ac_max_block)
716 mlog(0, "Calling in_range for max block %llu\n",
717 (unsigned long long)ac->ac_max_block);
718
719 ac->ac_inode = local_alloc_inode; 707 ac->ac_inode = local_alloc_inode;
720 /* We should never use localalloc from another slot */ 708 /* We should never use localalloc from another slot */
721 ac->ac_alloc_slot = osb->slot_num; 709 ac->ac_alloc_slot = osb->slot_num;
@@ -729,10 +717,12 @@ bail:
729 iput(local_alloc_inode); 717 iput(local_alloc_inode);
730 } 718 }
731 719
732 mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, 720 trace_ocfs2_reserve_local_alloc_bits(
733 status); 721 (unsigned long long)ac->ac_max_block,
722 bits_wanted, osb->slot_num, status);
734 723
735 mlog_exit(status); 724 if (status)
725 mlog_errno(status);
736 return status; 726 return status;
737} 727}
738 728
@@ -749,7 +739,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
749 struct ocfs2_dinode *alloc; 739 struct ocfs2_dinode *alloc;
750 struct ocfs2_local_alloc *la; 740 struct ocfs2_local_alloc *la;
751 741
752 mlog_entry_void();
753 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 742 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
754 743
755 local_alloc_inode = ac->ac_inode; 744 local_alloc_inode = ac->ac_inode;
@@ -788,7 +777,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
788 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 777 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
789 778
790bail: 779bail:
791 mlog_exit(status); 780 if (status)
781 mlog_errno(status);
792 return status; 782 return status;
793} 783}
794 784
@@ -799,13 +789,11 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
799 u32 count = 0; 789 u32 count = 0;
800 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 790 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
801 791
802 mlog_entry_void();
803
804 buffer = la->la_bitmap; 792 buffer = la->la_bitmap;
805 for (i = 0; i < le16_to_cpu(la->la_size); i++) 793 for (i = 0; i < le16_to_cpu(la->la_size); i++)
806 count += hweight8(buffer[i]); 794 count += hweight8(buffer[i]);
807 795
808 mlog_exit(count); 796 trace_ocfs2_local_alloc_count_bits(count);
809 return count; 797 return count;
810} 798}
811 799
@@ -820,10 +808,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
820 void *bitmap = NULL; 808 void *bitmap = NULL;
821 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 809 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
822 810
823 mlog_entry("(numbits wanted = %u)\n", *numbits);
824
825 if (!alloc->id1.bitmap1.i_total) { 811 if (!alloc->id1.bitmap1.i_total) {
826 mlog(0, "No bits in my window!\n");
827 bitoff = -1; 812 bitoff = -1;
828 goto bail; 813 goto bail;
829 } 814 }
@@ -883,8 +868,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
883 } 868 }
884 } 869 }
885 870
886 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 871 trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound);
887 numfound);
888 872
889 if (numfound == *numbits) 873 if (numfound == *numbits)
890 bitoff = startoff - numfound; 874 bitoff = startoff - numfound;
@@ -895,7 +879,10 @@ bail:
895 if (local_resv) 879 if (local_resv)
896 ocfs2_resv_discard(resmap, resv); 880 ocfs2_resv_discard(resmap, resv);
897 881
898 mlog_exit(bitoff); 882 trace_ocfs2_local_alloc_find_clear_bits(*numbits,
883 le32_to_cpu(alloc->id1.bitmap1.i_total),
884 bitoff, numfound);
885
899 return bitoff; 886 return bitoff;
900} 887}
901 888
@@ -903,15 +890,12 @@ static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
903{ 890{
904 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 891 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
905 int i; 892 int i;
906 mlog_entry_void();
907 893
908 alloc->id1.bitmap1.i_total = 0; 894 alloc->id1.bitmap1.i_total = 0;
909 alloc->id1.bitmap1.i_used = 0; 895 alloc->id1.bitmap1.i_used = 0;
910 la->la_bm_off = 0; 896 la->la_bm_off = 0;
911 for(i = 0; i < le16_to_cpu(la->la_size); i++) 897 for(i = 0; i < le16_to_cpu(la->la_size); i++)
912 la->la_bitmap[i] = 0; 898 la->la_bitmap[i] = 0;
913
914 mlog_exit_void();
915} 899}
916 900
917#if 0 901#if 0
@@ -952,18 +936,16 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
952 void *bitmap; 936 void *bitmap;
953 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 937 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
954 938
955 mlog_entry("total = %u, used = %u\n", 939 trace_ocfs2_sync_local_to_main(
956 le32_to_cpu(alloc->id1.bitmap1.i_total), 940 le32_to_cpu(alloc->id1.bitmap1.i_total),
957 le32_to_cpu(alloc->id1.bitmap1.i_used)); 941 le32_to_cpu(alloc->id1.bitmap1.i_used));
958 942
959 if (!alloc->id1.bitmap1.i_total) { 943 if (!alloc->id1.bitmap1.i_total) {
960 mlog(0, "nothing to sync!\n");
961 goto bail; 944 goto bail;
962 } 945 }
963 946
964 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 947 if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
965 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 948 le32_to_cpu(alloc->id1.bitmap1.i_total)) {
966 mlog(0, "all bits were taken!\n");
967 goto bail; 949 goto bail;
968 } 950 }
969 951
@@ -985,8 +967,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
985 ocfs2_clusters_to_blocks(osb->sb, 967 ocfs2_clusters_to_blocks(osb->sb,
986 start - count); 968 start - count);
987 969
988 mlog(0, "freeing %u bits starting at local alloc bit " 970 trace_ocfs2_sync_local_to_main_free(
989 "%u (la_start_blk = %llu, blkno = %llu)\n",
990 count, start - count, 971 count, start - count,
991 (unsigned long long)la_start_blk, 972 (unsigned long long)la_start_blk,
992 (unsigned long long)blkno); 973 (unsigned long long)blkno);
@@ -1007,7 +988,8 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
1007 } 988 }
1008 989
1009bail: 990bail:
1010 mlog_exit(status); 991 if (status)
992 mlog_errno(status);
1011 return status; 993 return status;
1012} 994}
1013 995
@@ -1132,7 +1114,8 @@ bail:
1132 *ac = NULL; 1114 *ac = NULL;
1133 } 1115 }
1134 1116
1135 mlog_exit(status); 1117 if (status)
1118 mlog_errno(status);
1136 return status; 1119 return status;
1137} 1120}
1138 1121
@@ -1148,17 +1131,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1148 struct ocfs2_dinode *alloc = NULL; 1131 struct ocfs2_dinode *alloc = NULL;
1149 struct ocfs2_local_alloc *la; 1132 struct ocfs2_local_alloc *la;
1150 1133
1151 mlog_entry_void();
1152
1153 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1134 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1154 la = OCFS2_LOCAL_ALLOC(alloc); 1135 la = OCFS2_LOCAL_ALLOC(alloc);
1155 1136
1156 if (alloc->id1.bitmap1.i_total) 1137 trace_ocfs2_local_alloc_new_window(
1157 mlog(0, "asking me to alloc a new window over a non-empty " 1138 le32_to_cpu(alloc->id1.bitmap1.i_total),
1158 "one\n"); 1139 osb->local_alloc_bits);
1159
1160 mlog(0, "Allocating %u clusters for a new window.\n",
1161 osb->local_alloc_bits);
1162 1140
1163 /* Instruct the allocation code to try the most recently used 1141 /* Instruct the allocation code to try the most recently used
1164 * cluster group. We'll re-record the group used this pass 1142 * cluster group. We'll re-record the group used this pass
@@ -1220,13 +1198,13 @@ retry_enospc:
1220 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 1198 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1221 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 1199 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1222 1200
1223 mlog(0, "New window allocated:\n"); 1201 trace_ocfs2_local_alloc_new_window_result(
1224 mlog(0, "window la_bm_off = %u\n", 1202 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off,
1225 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1203 le32_to_cpu(alloc->id1.bitmap1.i_total));
1226 mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
1227 1204
1228bail: 1205bail:
1229 mlog_exit(status); 1206 if (status)
1207 mlog_errno(status);
1230 return status; 1208 return status;
1231} 1209}
1232 1210
@@ -1243,8 +1221,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1243 struct ocfs2_dinode *alloc_copy = NULL; 1221 struct ocfs2_dinode *alloc_copy = NULL;
1244 struct ocfs2_alloc_context *ac = NULL; 1222 struct ocfs2_alloc_context *ac = NULL;
1245 1223
1246 mlog_entry_void();
1247
1248 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1224 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
1249 1225
1250 /* This will lock the main bitmap for us. */ 1226 /* This will lock the main bitmap for us. */
@@ -1324,7 +1300,8 @@ bail:
1324 if (ac) 1300 if (ac)
1325 ocfs2_free_alloc_context(ac); 1301 ocfs2_free_alloc_context(ac);
1326 1302
1327 mlog_exit(status); 1303 if (status)
1304 mlog_errno(status);
1328 return status; 1305 return status;
1329} 1306}
1330 1307
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index b5cb3ede9408..e57c804069ea 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28 28
29#define MLOG_MASK_PREFIX ML_INODE
30#include <cluster/masklog.h> 29#include <cluster/masklog.h>
31 30
32#include "ocfs2.h" 31#include "ocfs2.h"
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 7e32db9c2c99..3e9393ca39eb 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -31,7 +31,6 @@
31#include <linux/signal.h> 31#include <linux/signal.h>
32#include <linux/rbtree.h> 32#include <linux/rbtree.h>
33 33
34#define MLOG_MASK_PREFIX ML_FILE_IO
35#include <cluster/masklog.h> 34#include <cluster/masklog.h>
36 35
37#include "ocfs2.h" 36#include "ocfs2.h"
@@ -42,6 +41,7 @@
42#include "inode.h" 41#include "inode.h"
43#include "mmap.h" 42#include "mmap.h"
44#include "super.h" 43#include "super.h"
44#include "ocfs2_trace.h"
45 45
46 46
47static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) 47static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
@@ -49,13 +49,12 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
49 sigset_t oldset; 49 sigset_t oldset;
50 int ret; 50 int ret;
51 51
52 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
53
54 ocfs2_block_signals(&oldset); 52 ocfs2_block_signals(&oldset);
55 ret = filemap_fault(area, vmf); 53 ret = filemap_fault(area, vmf);
56 ocfs2_unblock_signals(&oldset); 54 ocfs2_unblock_signals(&oldset);
57 55
58 mlog_exit_ptr(vmf->page); 56 trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno,
57 area, vmf->page, vmf->pgoff);
59 return ret; 58 return ret;
60} 59}
61 60
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d6c25d76b537..e5d738cd9cc0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -42,7 +42,6 @@
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44 44
45#define MLOG_MASK_PREFIX ML_NAMEI
46#include <cluster/masklog.h> 45#include <cluster/masklog.h>
47 46
48#include "ocfs2.h" 47#include "ocfs2.h"
@@ -63,6 +62,7 @@
63#include "uptodate.h" 62#include "uptodate.h"
64#include "xattr.h" 63#include "xattr.h"
65#include "acl.h" 64#include "acl.h"
65#include "ocfs2_trace.h"
66 66
67#include "buffer_head_io.h" 67#include "buffer_head_io.h"
68 68
@@ -106,17 +106,15 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
106 struct dentry *ret; 106 struct dentry *ret;
107 struct ocfs2_inode_info *oi; 107 struct ocfs2_inode_info *oi;
108 108
109 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, 109 trace_ocfs2_lookup(dir, dentry, dentry->d_name.len,
110 dentry->d_name.len, dentry->d_name.name); 110 dentry->d_name.name,
111 (unsigned long long)OCFS2_I(dir)->ip_blkno, 0);
111 112
112 if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) { 113 if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
113 ret = ERR_PTR(-ENAMETOOLONG); 114 ret = ERR_PTR(-ENAMETOOLONG);
114 goto bail; 115 goto bail;
115 } 116 }
116 117
117 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
118 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
119
120 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT); 118 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
121 if (status < 0) { 119 if (status < 0) {
122 if (status != -ENOENT) 120 if (status != -ENOENT)
@@ -182,7 +180,7 @@ bail_unlock:
182 180
183bail: 181bail:
184 182
185 mlog_exit_ptr(ret); 183 trace_ocfs2_lookup_ret(ret);
186 184
187 return ret; 185 return ret;
188} 186}
@@ -235,9 +233,9 @@ static int ocfs2_mknod(struct inode *dir,
235 sigset_t oldset; 233 sigset_t oldset;
236 int did_block_signals = 0; 234 int did_block_signals = 0;
237 235
238 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 236 trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
239 (unsigned long)dev, dentry->d_name.len, 237 (unsigned long long)OCFS2_I(dir)->ip_blkno,
240 dentry->d_name.name); 238 (unsigned long)dev, mode);
241 239
242 dquot_initialize(dir); 240 dquot_initialize(dir);
243 241
@@ -354,10 +352,6 @@ static int ocfs2_mknod(struct inode *dir,
354 goto leave; 352 goto leave;
355 did_quota_inode = 1; 353 did_quota_inode = 1;
356 354
357 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
358 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
359 dentry->d_name.name);
360
361 /* do the real work now. */ 355 /* do the real work now. */
362 status = ocfs2_mknod_locked(osb, dir, inode, dev, 356 status = ocfs2_mknod_locked(osb, dir, inode, dev,
363 &new_fe_bh, parent_fe_bh, handle, 357 &new_fe_bh, parent_fe_bh, handle,
@@ -436,9 +430,6 @@ leave:
436 if (did_block_signals) 430 if (did_block_signals)
437 ocfs2_unblock_signals(&oldset); 431 ocfs2_unblock_signals(&oldset);
438 432
439 if (status == -ENOSPC)
440 mlog(0, "Disk is full\n");
441
442 brelse(new_fe_bh); 433 brelse(new_fe_bh);
443 brelse(parent_fe_bh); 434 brelse(parent_fe_bh);
444 kfree(si.name); 435 kfree(si.name);
@@ -466,7 +457,8 @@ leave:
466 iput(inode); 457 iput(inode);
467 } 458 }
468 459
469 mlog_exit(status); 460 if (status)
461 mlog_errno(status);
470 462
471 return status; 463 return status;
472} 464}
@@ -577,7 +569,8 @@ leave:
577 } 569 }
578 } 570 }
579 571
580 mlog_exit(status); 572 if (status)
573 mlog_errno(status);
581 return status; 574 return status;
582} 575}
583 576
@@ -615,10 +608,11 @@ static int ocfs2_mkdir(struct inode *dir,
615{ 608{
616 int ret; 609 int ret;
617 610
618 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, 611 trace_ocfs2_mkdir(dir, dentry, dentry->d_name.len, dentry->d_name.name,
619 dentry->d_name.len, dentry->d_name.name); 612 OCFS2_I(dir)->ip_blkno, mode);
620 ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0); 613 ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
621 mlog_exit(ret); 614 if (ret)
615 mlog_errno(ret);
622 616
623 return ret; 617 return ret;
624} 618}
@@ -630,10 +624,11 @@ static int ocfs2_create(struct inode *dir,
630{ 624{
631 int ret; 625 int ret;
632 626
633 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, 627 trace_ocfs2_create(dir, dentry, dentry->d_name.len, dentry->d_name.name,
634 dentry->d_name.len, dentry->d_name.name); 628 (unsigned long long)OCFS2_I(dir)->ip_blkno, mode);
635 ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0); 629 ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
636 mlog_exit(ret); 630 if (ret)
631 mlog_errno(ret);
637 632
638 return ret; 633 return ret;
639} 634}
@@ -652,9 +647,9 @@ static int ocfs2_link(struct dentry *old_dentry,
652 struct ocfs2_dir_lookup_result lookup = { NULL, }; 647 struct ocfs2_dir_lookup_result lookup = { NULL, };
653 sigset_t oldset; 648 sigset_t oldset;
654 649
655 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, 650 trace_ocfs2_link((unsigned long long)OCFS2_I(inode)->ip_blkno,
656 old_dentry->d_name.len, old_dentry->d_name.name, 651 old_dentry->d_name.len, old_dentry->d_name.name,
657 dentry->d_name.len, dentry->d_name.name); 652 dentry->d_name.len, dentry->d_name.name);
658 653
659 if (S_ISDIR(inode->i_mode)) 654 if (S_ISDIR(inode->i_mode))
660 return -EPERM; 655 return -EPERM;
@@ -757,7 +752,8 @@ out:
757 752
758 ocfs2_free_dir_lookup_result(&lookup); 753 ocfs2_free_dir_lookup_result(&lookup);
759 754
760 mlog_exit(err); 755 if (err)
756 mlog_errno(err);
761 757
762 return err; 758 return err;
763} 759}
@@ -809,19 +805,17 @@ static int ocfs2_unlink(struct inode *dir,
809 struct ocfs2_dir_lookup_result lookup = { NULL, }; 805 struct ocfs2_dir_lookup_result lookup = { NULL, };
810 struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; 806 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
811 807
812 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, 808 trace_ocfs2_unlink(dir, dentry, dentry->d_name.len,
813 dentry->d_name.len, dentry->d_name.name); 809 dentry->d_name.name,
810 (unsigned long long)OCFS2_I(dir)->ip_blkno,
811 (unsigned long long)OCFS2_I(inode)->ip_blkno);
814 812
815 dquot_initialize(dir); 813 dquot_initialize(dir);
816 814
817 BUG_ON(dentry->d_parent->d_inode != dir); 815 BUG_ON(dentry->d_parent->d_inode != dir);
818 816
819 mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 817 if (inode == osb->root_inode)
820
821 if (inode == osb->root_inode) {
822 mlog(0, "Cannot delete the root directory\n");
823 return -EPERM; 818 return -EPERM;
824 }
825 819
826 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1, 820 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
827 OI_LS_PARENT); 821 OI_LS_PARENT);
@@ -843,9 +837,10 @@ static int ocfs2_unlink(struct inode *dir,
843 if (OCFS2_I(inode)->ip_blkno != blkno) { 837 if (OCFS2_I(inode)->ip_blkno != blkno) {
844 status = -ENOENT; 838 status = -ENOENT;
845 839
846 mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n", 840 trace_ocfs2_unlink_noent(
847 (unsigned long long)OCFS2_I(inode)->ip_blkno, 841 (unsigned long long)OCFS2_I(inode)->ip_blkno,
848 (unsigned long long)blkno, OCFS2_I(inode)->ip_flags); 842 (unsigned long long)blkno,
843 OCFS2_I(inode)->ip_flags);
849 goto leave; 844 goto leave;
850 } 845 }
851 846
@@ -954,7 +949,8 @@ leave:
954 ocfs2_free_dir_lookup_result(&orphan_insert); 949 ocfs2_free_dir_lookup_result(&orphan_insert);
955 ocfs2_free_dir_lookup_result(&lookup); 950 ocfs2_free_dir_lookup_result(&lookup);
956 951
957 mlog_exit(status); 952 if (status)
953 mlog_errno(status);
958 954
959 return status; 955 return status;
960} 956}
@@ -975,9 +971,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
975 struct buffer_head **tmpbh; 971 struct buffer_head **tmpbh;
976 struct inode *tmpinode; 972 struct inode *tmpinode;
977 973
978 mlog_entry("(inode1 = %llu, inode2 = %llu)\n", 974 trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
979 (unsigned long long)oi1->ip_blkno, 975 (unsigned long long)oi2->ip_blkno);
980 (unsigned long long)oi2->ip_blkno);
981 976
982 if (*bh1) 977 if (*bh1)
983 *bh1 = NULL; 978 *bh1 = NULL;
@@ -988,7 +983,6 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
988 if (oi1->ip_blkno != oi2->ip_blkno) { 983 if (oi1->ip_blkno != oi2->ip_blkno) {
989 if (oi1->ip_blkno < oi2->ip_blkno) { 984 if (oi1->ip_blkno < oi2->ip_blkno) {
990 /* switch id1 and id2 around */ 985 /* switch id1 and id2 around */
991 mlog(0, "switching them around...\n");
992 tmpbh = bh2; 986 tmpbh = bh2;
993 bh2 = bh1; 987 bh2 = bh1;
994 bh1 = tmpbh; 988 bh1 = tmpbh;
@@ -1024,8 +1018,13 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
1024 mlog_errno(status); 1018 mlog_errno(status);
1025 } 1019 }
1026 1020
1021 trace_ocfs2_double_lock_end(
1022 (unsigned long long)OCFS2_I(inode1)->ip_blkno,
1023 (unsigned long long)OCFS2_I(inode2)->ip_blkno);
1024
1027bail: 1025bail:
1028 mlog_exit(status); 1026 if (status)
1027 mlog_errno(status);
1029 return status; 1028 return status;
1030} 1029}
1031 1030
@@ -1067,10 +1066,9 @@ static int ocfs2_rename(struct inode *old_dir,
1067 /* At some point it might be nice to break this function up a 1066 /* At some point it might be nice to break this function up a
1068 * bit. */ 1067 * bit. */
1069 1068
1070 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n", 1069 trace_ocfs2_rename(old_dir, old_dentry, new_dir, new_dentry,
1071 old_dir, old_dentry, new_dir, new_dentry, 1070 old_dentry->d_name.len, old_dentry->d_name.name,
1072 old_dentry->d_name.len, old_dentry->d_name.name, 1071 new_dentry->d_name.len, new_dentry->d_name.name);
1073 new_dentry->d_name.len, new_dentry->d_name.name);
1074 1072
1075 dquot_initialize(old_dir); 1073 dquot_initialize(old_dir);
1076 dquot_initialize(new_dir); 1074 dquot_initialize(new_dir);
@@ -1227,16 +1225,15 @@ static int ocfs2_rename(struct inode *old_dir,
1227 if (!new_inode) { 1225 if (!new_inode) {
1228 status = -EACCES; 1226 status = -EACCES;
1229 1227
1230 mlog(0, "We found an inode for name %.*s but VFS " 1228 trace_ocfs2_rename_target_exists(new_dentry->d_name.len,
1231 "didn't give us one.\n", new_dentry->d_name.len, 1229 new_dentry->d_name.name);
1232 new_dentry->d_name.name);
1233 goto bail; 1230 goto bail;
1234 } 1231 }
1235 1232
1236 if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) { 1233 if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1237 status = -EACCES; 1234 status = -EACCES;
1238 1235
1239 mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n", 1236 trace_ocfs2_rename_disagree(
1240 (unsigned long long)OCFS2_I(new_inode)->ip_blkno, 1237 (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
1241 (unsigned long long)newfe_blkno, 1238 (unsigned long long)newfe_blkno,
1242 OCFS2_I(new_inode)->ip_flags); 1239 OCFS2_I(new_inode)->ip_flags);
@@ -1259,8 +1256,7 @@ static int ocfs2_rename(struct inode *old_dir,
1259 1256
1260 newfe = (struct ocfs2_dinode *) newfe_bh->b_data; 1257 newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1261 1258
1262 mlog(0, "aha rename over existing... new_blkno=%llu " 1259 trace_ocfs2_rename_over_existing(
1263 "newfebh=%p bhblocknr=%llu\n",
1264 (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? 1260 (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
1265 (unsigned long long)newfe_bh->b_blocknr : 0ULL); 1261 (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1266 1262
@@ -1476,7 +1472,8 @@ bail:
1476 brelse(old_dir_bh); 1472 brelse(old_dir_bh);
1477 brelse(new_dir_bh); 1473 brelse(new_dir_bh);
1478 1474
1479 mlog_exit(status); 1475 if (status)
1476 mlog_errno(status);
1480 1477
1481 return status; 1478 return status;
1482} 1479}
@@ -1501,9 +1498,8 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1501 * write i_size + 1 bytes. */ 1498 * write i_size + 1 bytes. */
1502 blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 1499 blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1503 1500
1504 mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n", 1501 trace_ocfs2_create_symlink_data((unsigned long long)inode->i_blocks,
1505 (unsigned long long)inode->i_blocks, 1502 i_size_read(inode), blocks);
1506 i_size_read(inode), blocks);
1507 1503
1508 /* Sanity check -- make sure we're going to fit. */ 1504 /* Sanity check -- make sure we're going to fit. */
1509 if (bytes_left > 1505 if (bytes_left >
@@ -1579,7 +1575,8 @@ bail:
1579 kfree(bhs); 1575 kfree(bhs);
1580 } 1576 }
1581 1577
1582 mlog_exit(status); 1578 if (status)
1579 mlog_errno(status);
1583 return status; 1580 return status;
1584} 1581}
1585 1582
@@ -1610,8 +1607,8 @@ static int ocfs2_symlink(struct inode *dir,
1610 sigset_t oldset; 1607 sigset_t oldset;
1611 int did_block_signals = 0; 1608 int did_block_signals = 0;
1612 1609
1613 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1610 trace_ocfs2_symlink_begin(dir, dentry, symname,
1614 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1611 dentry->d_name.len, dentry->d_name.name);
1615 1612
1616 dquot_initialize(dir); 1613 dquot_initialize(dir);
1617 1614
@@ -1713,9 +1710,10 @@ static int ocfs2_symlink(struct inode *dir,
1713 goto bail; 1710 goto bail;
1714 did_quota_inode = 1; 1711 did_quota_inode = 1;
1715 1712
1716 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, 1713 trace_ocfs2_symlink_create(dir, dentry, dentry->d_name.len,
1717 inode->i_mode, dentry->d_name.len, 1714 dentry->d_name.name,
1718 dentry->d_name.name); 1715 (unsigned long long)OCFS2_I(dir)->ip_blkno,
1716 inode->i_mode);
1719 1717
1720 status = ocfs2_mknod_locked(osb, dir, inode, 1718 status = ocfs2_mknod_locked(osb, dir, inode,
1721 0, &new_fe_bh, parent_fe_bh, handle, 1719 0, &new_fe_bh, parent_fe_bh, handle,
@@ -1835,7 +1833,8 @@ bail:
1835 iput(inode); 1833 iput(inode);
1836 } 1834 }
1837 1835
1838 mlog_exit(status); 1836 if (status)
1837 mlog_errno(status);
1839 1838
1840 return status; 1839 return status;
1841} 1840}
@@ -1844,8 +1843,6 @@ static int ocfs2_blkno_stringify(u64 blkno, char *name)
1844{ 1843{
1845 int status, namelen; 1844 int status, namelen;
1846 1845
1847 mlog_entry_void();
1848
1849 namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx", 1846 namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
1850 (long long)blkno); 1847 (long long)blkno);
1851 if (namelen <= 0) { 1848 if (namelen <= 0) {
@@ -1862,12 +1859,12 @@ static int ocfs2_blkno_stringify(u64 blkno, char *name)
1862 goto bail; 1859 goto bail;
1863 } 1860 }
1864 1861
1865 mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name, 1862 trace_ocfs2_blkno_stringify(blkno, name, namelen);
1866 namelen);
1867 1863
1868 status = 0; 1864 status = 0;
1869bail: 1865bail:
1870 mlog_exit(status); 1866 if (status < 0)
1867 mlog_errno(status);
1871 return status; 1868 return status;
1872} 1869}
1873 1870
@@ -1980,7 +1977,8 @@ out:
1980 iput(orphan_dir_inode); 1977 iput(orphan_dir_inode);
1981 } 1978 }
1982 1979
1983 mlog_exit(ret); 1980 if (ret)
1981 mlog_errno(ret);
1984 return ret; 1982 return ret;
1985} 1983}
1986 1984
@@ -1997,7 +1995,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1997 struct ocfs2_dinode *orphan_fe; 1995 struct ocfs2_dinode *orphan_fe;
1998 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 1996 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
1999 1997
2000 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 1998 trace_ocfs2_orphan_add_begin(
1999 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2001 2000
2002 status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh); 2001 status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
2003 if (status < 0) { 2002 if (status < 0) {
@@ -2056,13 +2055,14 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2056 2055
2057 ocfs2_journal_dirty(handle, fe_bh); 2056 ocfs2_journal_dirty(handle, fe_bh);
2058 2057
2059 mlog(0, "Inode %llu orphaned in slot %d\n", 2058 trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno,
2060 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); 2059 osb->slot_num);
2061 2060
2062leave: 2061leave:
2063 brelse(orphan_dir_bh); 2062 brelse(orphan_dir_bh);
2064 2063
2065 mlog_exit(status); 2064 if (status)
2065 mlog_errno(status);
2066 return status; 2066 return status;
2067} 2067}
2068 2068
@@ -2078,17 +2078,15 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2078 int status = 0; 2078 int status = 0;
2079 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2079 struct ocfs2_dir_lookup_result lookup = { NULL, };
2080 2080
2081 mlog_entry_void();
2082
2083 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); 2081 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2084 if (status < 0) { 2082 if (status < 0) {
2085 mlog_errno(status); 2083 mlog_errno(status);
2086 goto leave; 2084 goto leave;
2087 } 2085 }
2088 2086
2089 mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n", 2087 trace_ocfs2_orphan_del(
2090 name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, 2088 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2091 OCFS2_ORPHAN_NAMELEN); 2089 name, OCFS2_ORPHAN_NAMELEN);
2092 2090
2093 /* find it's spot in the orphan directory */ 2091 /* find it's spot in the orphan directory */
2094 status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode, 2092 status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode,
@@ -2124,12 +2122,13 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2124leave: 2122leave:
2125 ocfs2_free_dir_lookup_result(&lookup); 2123 ocfs2_free_dir_lookup_result(&lookup);
2126 2124
2127 mlog_exit(status); 2125 if (status)
2126 mlog_errno(status);
2128 return status; 2127 return status;
2129} 2128}
2130 2129
2131/** 2130/**
2132 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly 2131 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to receive a newly
2133 * allocated file. This is different from the typical 'add to orphan dir' 2132 * allocated file. This is different from the typical 'add to orphan dir'
2134 * operation in that the inode does not yet exist. This is a problem because 2133 * operation in that the inode does not yet exist. This is a problem because
2135 * the orphan dir stringifies the inode block number to come up with it's 2134 * the orphan dir stringifies the inode block number to come up with it's
@@ -2321,9 +2320,6 @@ leave:
2321 iput(orphan_dir); 2320 iput(orphan_dir);
2322 } 2321 }
2323 2322
2324 if (status == -ENOSPC)
2325 mlog(0, "Disk is full\n");
2326
2327 if ((status < 0) && inode) { 2323 if ((status < 0) && inode) {
2328 clear_nlink(inode); 2324 clear_nlink(inode);
2329 iput(inode); 2325 iput(inode);
@@ -2358,8 +2354,10 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2358 struct buffer_head *di_bh = NULL; 2354 struct buffer_head *di_bh = NULL;
2359 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2355 struct ocfs2_dir_lookup_result lookup = { NULL, };
2360 2356
2361 mlog_entry("(0x%p, 0x%p, %.*s')\n", dir, dentry, 2357 trace_ocfs2_mv_orphaned_inode_to_new(dir, dentry,
2362 dentry->d_name.len, dentry->d_name.name); 2358 dentry->d_name.len, dentry->d_name.name,
2359 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2360 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2363 2361
2364 status = ocfs2_inode_lock(dir, &parent_di_bh, 1); 2362 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2365 if (status < 0) { 2363 if (status < 0) {
@@ -2476,7 +2474,8 @@ leave:
2476 2474
2477 ocfs2_free_dir_lookup_result(&lookup); 2475 ocfs2_free_dir_lookup_result(&lookup);
2478 2476
2479 mlog_exit(status); 2477 if (status)
2478 mlog_errno(status);
2480 2479
2481 return status; 2480 return status;
2482} 2481}
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 51cd6898e7f1..409285854f64 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -147,6 +147,17 @@ struct ocfs2_lock_res_ops;
147 147
148typedef void (*ocfs2_lock_callback)(int status, unsigned long data); 148typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
149 149
150#ifdef CONFIG_OCFS2_FS_STATS
151struct ocfs2_lock_stats {
152 u64 ls_total; /* Total wait in NSEC */
153 u32 ls_gets; /* Num acquires */
154 u32 ls_fail; /* Num failed acquires */
155
156 /* Storing max wait in usecs saves 24 bytes per inode */
157 u32 ls_max; /* Max wait in USEC */
158};
159#endif
160
150struct ocfs2_lock_res { 161struct ocfs2_lock_res {
151 void *l_priv; 162 void *l_priv;
152 struct ocfs2_lock_res_ops *l_ops; 163 struct ocfs2_lock_res_ops *l_ops;
@@ -182,15 +193,9 @@ struct ocfs2_lock_res {
182 struct list_head l_debug_list; 193 struct list_head l_debug_list;
183 194
184#ifdef CONFIG_OCFS2_FS_STATS 195#ifdef CONFIG_OCFS2_FS_STATS
185 unsigned long long l_lock_num_prmode; /* PR acquires */ 196 struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */
186 unsigned long long l_lock_num_exmode; /* EX acquires */ 197 u32 l_lock_refresh; /* Disk refreshes */
187 unsigned int l_lock_num_prmode_failed; /* Failed PR gets */ 198 struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */
188 unsigned int l_lock_num_exmode_failed; /* Failed EX gets */
189 unsigned long long l_lock_total_prmode; /* Tot wait for PR */
190 unsigned long long l_lock_total_exmode; /* Tot wait for EX */
191 unsigned int l_lock_max_prmode; /* Max wait for PR */
192 unsigned int l_lock_max_exmode; /* Max wait for EX */
193 unsigned int l_lock_refresh; /* Disk refreshes */
194#endif 199#endif
195#ifdef CONFIG_DEBUG_LOCK_ALLOC 200#ifdef CONFIG_DEBUG_LOCK_ALLOC
196 struct lockdep_map l_lockdep_map; 201 struct lockdep_map l_lockdep_map;
@@ -831,18 +836,18 @@ static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
831 836
832static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) 837static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
833{ 838{
834 ext2_set_bit(bit, bitmap); 839 __test_and_set_bit_le(bit, bitmap);
835} 840}
836#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) 841#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr))
837 842
838static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) 843static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap)
839{ 844{
840 ext2_clear_bit(bit, bitmap); 845 __test_and_clear_bit_le(bit, bitmap);
841} 846}
842#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) 847#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr))
843 848
844#define ocfs2_test_bit ext2_test_bit 849#define ocfs2_test_bit test_bit_le
845#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit 850#define ocfs2_find_next_zero_bit find_next_zero_bit_le
846#define ocfs2_find_next_bit ext2_find_next_bit 851#define ocfs2_find_next_bit find_next_bit_le
847#endif /* OCFS2_H */ 852#endif /* OCFS2_H */
848 853
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index bf2e7764920e..b68f87a83924 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -441,7 +441,7 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
441struct ocfs2_block_check { 441struct ocfs2_block_check {
442/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */ 442/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */
443 __le16 bc_ecc; /* Single-error-correction parity vector. 443 __le16 bc_ecc; /* Single-error-correction parity vector.
444 This is a simple Hamming code dependant 444 This is a simple Hamming code dependent
445 on the blocksize. OCFS2's maximum 445 on the blocksize. OCFS2's maximum
446 blocksize, 4K, requires 16 parity bits, 446 blocksize, 4K, requires 16 parity bits,
447 so we fit in __le16. */ 447 so we fit in __le16. */
@@ -750,7 +750,7 @@ struct ocfs2_dinode {
750 after an unclean 750 after an unclean
751 shutdown */ 751 shutdown */
752 } journal1; 752 } journal1;
753 } id1; /* Inode type dependant 1 */ 753 } id1; /* Inode type dependent 1 */
754/*C0*/ union { 754/*C0*/ union {
755 struct ocfs2_super_block i_super; 755 struct ocfs2_super_block i_super;
756 struct ocfs2_local_alloc i_lab; 756 struct ocfs2_local_alloc i_lab;
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
new file mode 100644
index 000000000000..a1dae5bb54ac
--- /dev/null
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -0,0 +1,2739 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ocfs2
3
4#if !defined(_TRACE_OCFS2_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_OCFS2_H
6
7#include <linux/tracepoint.h>
8
9DECLARE_EVENT_CLASS(ocfs2__int,
10 TP_PROTO(int num),
11 TP_ARGS(num),
12 TP_STRUCT__entry(
13 __field(int, num)
14 ),
15 TP_fast_assign(
16 __entry->num = num;
17 ),
18 TP_printk("%d", __entry->num)
19);
20
21#define DEFINE_OCFS2_INT_EVENT(name) \
22DEFINE_EVENT(ocfs2__int, name, \
23 TP_PROTO(int num), \
24 TP_ARGS(num))
25
26DECLARE_EVENT_CLASS(ocfs2__uint,
27 TP_PROTO(unsigned int num),
28 TP_ARGS(num),
29 TP_STRUCT__entry(
30 __field( unsigned int, num )
31 ),
32 TP_fast_assign(
33 __entry->num = num;
34 ),
35 TP_printk("%u", __entry->num)
36);
37
38#define DEFINE_OCFS2_UINT_EVENT(name) \
39DEFINE_EVENT(ocfs2__uint, name, \
40 TP_PROTO(unsigned int num), \
41 TP_ARGS(num))
42
43DECLARE_EVENT_CLASS(ocfs2__ull,
44 TP_PROTO(unsigned long long blkno),
45 TP_ARGS(blkno),
46 TP_STRUCT__entry(
47 __field(unsigned long long, blkno)
48 ),
49 TP_fast_assign(
50 __entry->blkno = blkno;
51 ),
52 TP_printk("%llu", __entry->blkno)
53);
54
55#define DEFINE_OCFS2_ULL_EVENT(name) \
56DEFINE_EVENT(ocfs2__ull, name, \
57 TP_PROTO(unsigned long long num), \
58 TP_ARGS(num))
59
60DECLARE_EVENT_CLASS(ocfs2__pointer,
61 TP_PROTO(void *pointer),
62 TP_ARGS(pointer),
63 TP_STRUCT__entry(
64 __field(void *, pointer)
65 ),
66 TP_fast_assign(
67 __entry->pointer = pointer;
68 ),
69 TP_printk("%p", __entry->pointer)
70);
71
72#define DEFINE_OCFS2_POINTER_EVENT(name) \
73DEFINE_EVENT(ocfs2__pointer, name, \
74 TP_PROTO(void *pointer), \
75 TP_ARGS(pointer))
76
77DECLARE_EVENT_CLASS(ocfs2__string,
78 TP_PROTO(const char *name),
79 TP_ARGS(name),
80 TP_STRUCT__entry(
81 __string(name,name)
82 ),
83 TP_fast_assign(
84 __assign_str(name, name);
85 ),
86 TP_printk("%s", __get_str(name))
87);
88
89#define DEFINE_OCFS2_STRING_EVENT(name) \
90DEFINE_EVENT(ocfs2__string, name, \
91 TP_PROTO(const char *name), \
92 TP_ARGS(name))
93
94DECLARE_EVENT_CLASS(ocfs2__int_int,
95 TP_PROTO(int value1, int value2),
96 TP_ARGS(value1, value2),
97 TP_STRUCT__entry(
98 __field(int, value1)
99 __field(int, value2)
100 ),
101 TP_fast_assign(
102 __entry->value1 = value1;
103 __entry->value2 = value2;
104 ),
105 TP_printk("%d %d", __entry->value1, __entry->value2)
106);
107
108#define DEFINE_OCFS2_INT_INT_EVENT(name) \
109DEFINE_EVENT(ocfs2__int_int, name, \
110 TP_PROTO(int val1, int val2), \
111 TP_ARGS(val1, val2))
112
113DECLARE_EVENT_CLASS(ocfs2__uint_int,
114 TP_PROTO(unsigned int value1, int value2),
115 TP_ARGS(value1, value2),
116 TP_STRUCT__entry(
117 __field(unsigned int, value1)
118 __field(int, value2)
119 ),
120 TP_fast_assign(
121 __entry->value1 = value1;
122 __entry->value2 = value2;
123 ),
124 TP_printk("%u %d", __entry->value1, __entry->value2)
125);
126
127#define DEFINE_OCFS2_UINT_INT_EVENT(name) \
128DEFINE_EVENT(ocfs2__uint_int, name, \
129 TP_PROTO(unsigned int val1, int val2), \
130 TP_ARGS(val1, val2))
131
132DECLARE_EVENT_CLASS(ocfs2__uint_uint,
133 TP_PROTO(unsigned int value1, unsigned int value2),
134 TP_ARGS(value1, value2),
135 TP_STRUCT__entry(
136 __field(unsigned int, value1)
137 __field(unsigned int, value2)
138 ),
139 TP_fast_assign(
140 __entry->value1 = value1;
141 __entry->value2 = value2;
142 ),
143 TP_printk("%u %u", __entry->value1, __entry->value2)
144);
145
146#define DEFINE_OCFS2_UINT_UINT_EVENT(name) \
147DEFINE_EVENT(ocfs2__uint_uint, name, \
148 TP_PROTO(unsigned int val1, unsigned int val2), \
149 TP_ARGS(val1, val2))
150
151DECLARE_EVENT_CLASS(ocfs2__ull_uint,
152 TP_PROTO(unsigned long long value1, unsigned int value2),
153 TP_ARGS(value1, value2),
154 TP_STRUCT__entry(
155 __field(unsigned long long, value1)
156 __field(unsigned int, value2)
157 ),
158 TP_fast_assign(
159 __entry->value1 = value1;
160 __entry->value2 = value2;
161 ),
162 TP_printk("%llu %u", __entry->value1, __entry->value2)
163);
164
165#define DEFINE_OCFS2_ULL_UINT_EVENT(name) \
166DEFINE_EVENT(ocfs2__ull_uint, name, \
167 TP_PROTO(unsigned long long val1, unsigned int val2), \
168 TP_ARGS(val1, val2))
169
170DECLARE_EVENT_CLASS(ocfs2__ull_int,
171 TP_PROTO(unsigned long long value1, int value2),
172 TP_ARGS(value1, value2),
173 TP_STRUCT__entry(
174 __field(unsigned long long, value1)
175 __field(int, value2)
176 ),
177 TP_fast_assign(
178 __entry->value1 = value1;
179 __entry->value2 = value2;
180 ),
181 TP_printk("%llu %d", __entry->value1, __entry->value2)
182);
183
184#define DEFINE_OCFS2_ULL_INT_EVENT(name) \
185DEFINE_EVENT(ocfs2__ull_int, name, \
186 TP_PROTO(unsigned long long val1, int val2), \
187 TP_ARGS(val1, val2))
188
189DECLARE_EVENT_CLASS(ocfs2__ull_ull,
190 TP_PROTO(unsigned long long value1, unsigned long long value2),
191 TP_ARGS(value1, value2),
192 TP_STRUCT__entry(
193 __field(unsigned long long, value1)
194 __field(unsigned long long, value2)
195 ),
196 TP_fast_assign(
197 __entry->value1 = value1;
198 __entry->value2 = value2;
199 ),
200 TP_printk("%llu %llu", __entry->value1, __entry->value2)
201);
202
203#define DEFINE_OCFS2_ULL_ULL_EVENT(name) \
204DEFINE_EVENT(ocfs2__ull_ull, name, \
205 TP_PROTO(unsigned long long val1, unsigned long long val2), \
206 TP_ARGS(val1, val2))
207
208DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint,
209 TP_PROTO(unsigned long long value1,
210 unsigned long long value2, unsigned int value3),
211 TP_ARGS(value1, value2, value3),
212 TP_STRUCT__entry(
213 __field(unsigned long long, value1)
214 __field(unsigned long long, value2)
215 __field(unsigned int, value3)
216 ),
217 TP_fast_assign(
218 __entry->value1 = value1;
219 __entry->value2 = value2;
220 __entry->value3 = value3;
221 ),
222 TP_printk("%llu %llu %u",
223 __entry->value1, __entry->value2, __entry->value3)
224);
225
226#define DEFINE_OCFS2_ULL_ULL_UINT_EVENT(name) \
227DEFINE_EVENT(ocfs2__ull_ull_uint, name, \
228 TP_PROTO(unsigned long long val1, \
229 unsigned long long val2, unsigned int val3), \
230 TP_ARGS(val1, val2, val3))
231
232DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint,
233 TP_PROTO(unsigned long long value1,
234 unsigned int value2, unsigned int value3),
235 TP_ARGS(value1, value2, value3),
236 TP_STRUCT__entry(
237 __field(unsigned long long, value1)
238 __field(unsigned int, value2)
239 __field(unsigned int, value3)
240 ),
241 TP_fast_assign(
242 __entry->value1 = value1;
243 __entry->value2 = value2;
244 __entry->value3 = value3;
245 ),
246 TP_printk("%llu %u %u", __entry->value1,
247 __entry->value2, __entry->value3)
248);
249
250#define DEFINE_OCFS2_ULL_UINT_UINT_EVENT(name) \
251DEFINE_EVENT(ocfs2__ull_uint_uint, name, \
252 TP_PROTO(unsigned long long val1, \
253 unsigned int val2, unsigned int val3), \
254 TP_ARGS(val1, val2, val3))
255
256DECLARE_EVENT_CLASS(ocfs2__uint_uint_uint,
257 TP_PROTO(unsigned int value1, unsigned int value2,
258 unsigned int value3),
259 TP_ARGS(value1, value2, value3),
260 TP_STRUCT__entry(
261 __field( unsigned int, value1 )
262 __field( unsigned int, value2 )
263 __field( unsigned int, value3 )
264 ),
265 TP_fast_assign(
266 __entry->value1 = value1;
267 __entry->value2 = value2;
268 __entry->value3 = value3;
269 ),
270 TP_printk("%u %u %u", __entry->value1, __entry->value2, __entry->value3)
271);
272
273#define DEFINE_OCFS2_UINT_UINT_UINT_EVENT(name) \
274DEFINE_EVENT(ocfs2__uint_uint_uint, name, \
275 TP_PROTO(unsigned int value1, unsigned int value2, \
276 unsigned int value3), \
277 TP_ARGS(value1, value2, value3))
278
279DECLARE_EVENT_CLASS(ocfs2__ull_ull_ull,
280 TP_PROTO(unsigned long long value1,
281 unsigned long long value2, unsigned long long value3),
282 TP_ARGS(value1, value2, value3),
283 TP_STRUCT__entry(
284 __field(unsigned long long, value1)
285 __field(unsigned long long, value2)
286 __field(unsigned long long, value3)
287 ),
288 TP_fast_assign(
289 __entry->value1 = value1;
290 __entry->value2 = value2;
291 __entry->value3 = value3;
292 ),
293 TP_printk("%llu %llu %llu",
294 __entry->value1, __entry->value2, __entry->value3)
295);
296
297#define DEFINE_OCFS2_ULL_ULL_ULL_EVENT(name) \
298DEFINE_EVENT(ocfs2__ull_ull_ull, name, \
299 TP_PROTO(unsigned long long value1, unsigned long long value2, \
300 unsigned long long value3), \
301 TP_ARGS(value1, value2, value3))
302
303DECLARE_EVENT_CLASS(ocfs2__ull_int_int_int,
304 TP_PROTO(unsigned long long ull, int value1, int value2, int value3),
305 TP_ARGS(ull, value1, value2, value3),
306 TP_STRUCT__entry(
307 __field( unsigned long long, ull )
308 __field( int, value1 )
309 __field( int, value2 )
310 __field( int, value3 )
311 ),
312 TP_fast_assign(
313 __entry->ull = ull;
314 __entry->value1 = value1;
315 __entry->value2 = value2;
316 __entry->value3 = value3;
317 ),
318 TP_printk("%llu %d %d %d",
319 __entry->ull, __entry->value1,
320 __entry->value2, __entry->value3)
321);
322
323#define DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(name) \
324DEFINE_EVENT(ocfs2__ull_int_int_int, name, \
325 TP_PROTO(unsigned long long ull, int value1, \
326 int value2, int value3), \
327 TP_ARGS(ull, value1, value2, value3))
328
329DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint_uint,
330 TP_PROTO(unsigned long long ull, unsigned int value1,
331 unsigned int value2, unsigned int value3),
332 TP_ARGS(ull, value1, value2, value3),
333 TP_STRUCT__entry(
334 __field(unsigned long long, ull)
335 __field(unsigned int, value1)
336 __field(unsigned int, value2)
337 __field(unsigned int, value3)
338 ),
339 TP_fast_assign(
340 __entry->ull = ull;
341 __entry->value1 = value1;
342 __entry->value2 = value2;
343 __entry->value3 = value3;
344 ),
345 TP_printk("%llu %u %u %u",
346 __entry->ull, __entry->value1,
347 __entry->value2, __entry->value3)
348);
349
350#define DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(name) \
351DEFINE_EVENT(ocfs2__ull_uint_uint_uint, name, \
352 TP_PROTO(unsigned long long ull, unsigned int value1, \
353 unsigned int value2, unsigned int value3), \
354 TP_ARGS(ull, value1, value2, value3))
355
356DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint_uint,
357 TP_PROTO(unsigned long long value1, unsigned long long value2,
358 unsigned int value3, unsigned int value4),
359 TP_ARGS(value1, value2, value3, value4),
360 TP_STRUCT__entry(
361 __field(unsigned long long, value1)
362 __field(unsigned long long, value2)
363 __field(unsigned int, value3)
364 __field(unsigned int, value4)
365 ),
366 TP_fast_assign(
367 __entry->value1 = value1;
368 __entry->value2 = value2;
369 __entry->value3 = value3;
370 __entry->value4 = value4;
371 ),
372 TP_printk("%llu %llu %u %u",
373 __entry->value1, __entry->value2,
374 __entry->value3, __entry->value4)
375);
376
377#define DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(name) \
378DEFINE_EVENT(ocfs2__ull_ull_uint_uint, name, \
379 TP_PROTO(unsigned long long ull, unsigned long long ull1, \
380 unsigned int value2, unsigned int value3), \
381 TP_ARGS(ull, ull1, value2, value3))
382
383/* Trace events for fs/ocfs2/alloc.c. */
384DECLARE_EVENT_CLASS(ocfs2__btree_ops,
385 TP_PROTO(unsigned long long owner,\
386 unsigned int value1, unsigned int value2),
387 TP_ARGS(owner, value1, value2),
388 TP_STRUCT__entry(
389 __field(unsigned long long, owner)
390 __field(unsigned int, value1)
391 __field(unsigned int, value2)
392 ),
393 TP_fast_assign(
394 __entry->owner = owner;
395 __entry->value1 = value1;
396 __entry->value2 = value2;
397 ),
398 TP_printk("%llu %u %u",
399 __entry->owner, __entry->value1, __entry->value2)
400);
401
402#define DEFINE_OCFS2_BTREE_EVENT(name) \
403DEFINE_EVENT(ocfs2__btree_ops, name, \
404 TP_PROTO(unsigned long long owner, \
405 unsigned int value1, unsigned int value2), \
406 TP_ARGS(owner, value1, value2))
407
408DEFINE_OCFS2_BTREE_EVENT(ocfs2_adjust_rightmost_branch);
409
410DEFINE_OCFS2_BTREE_EVENT(ocfs2_rotate_tree_right);
411
412DEFINE_OCFS2_BTREE_EVENT(ocfs2_append_rec_to_path);
413
414DEFINE_OCFS2_BTREE_EVENT(ocfs2_insert_extent_start);
415
416DEFINE_OCFS2_BTREE_EVENT(ocfs2_add_clusters_in_btree);
417
418DEFINE_OCFS2_INT_EVENT(ocfs2_num_free_extents);
419
420DEFINE_OCFS2_INT_EVENT(ocfs2_complete_edge_insert);
421
422TRACE_EVENT(ocfs2_grow_tree,
423 TP_PROTO(unsigned long long owner, int depth),
424 TP_ARGS(owner, depth),
425 TP_STRUCT__entry(
426 __field(unsigned long long, owner)
427 __field(int, depth)
428 ),
429 TP_fast_assign(
430 __entry->owner = owner;
431 __entry->depth = depth;
432 ),
433 TP_printk("%llu %d", __entry->owner, __entry->depth)
434);
435
436TRACE_EVENT(ocfs2_rotate_subtree,
437 TP_PROTO(int subtree_root, unsigned long long blkno,
438 int depth),
439 TP_ARGS(subtree_root, blkno, depth),
440 TP_STRUCT__entry(
441 __field(int, subtree_root)
442 __field(unsigned long long, blkno)
443 __field(int, depth)
444 ),
445 TP_fast_assign(
446 __entry->subtree_root = subtree_root;
447 __entry->blkno = blkno;
448 __entry->depth = depth;
449 ),
450 TP_printk("%d %llu %d", __entry->subtree_root,
451 __entry->blkno, __entry->depth)
452);
453
454TRACE_EVENT(ocfs2_insert_extent,
455 TP_PROTO(unsigned int ins_appending, unsigned int ins_contig,
456 int ins_contig_index, int free_records, int ins_tree_depth),
457 TP_ARGS(ins_appending, ins_contig, ins_contig_index, free_records,
458 ins_tree_depth),
459 TP_STRUCT__entry(
460 __field(unsigned int, ins_appending)
461 __field(unsigned int, ins_contig)
462 __field(int, ins_contig_index)
463 __field(int, free_records)
464 __field(int, ins_tree_depth)
465 ),
466 TP_fast_assign(
467 __entry->ins_appending = ins_appending;
468 __entry->ins_contig = ins_contig;
469 __entry->ins_contig_index = ins_contig_index;
470 __entry->free_records = free_records;
471 __entry->ins_tree_depth = ins_tree_depth;
472 ),
473 TP_printk("%u %u %d %d %d",
474 __entry->ins_appending, __entry->ins_contig,
475 __entry->ins_contig_index, __entry->free_records,
476 __entry->ins_tree_depth)
477);
478
479TRACE_EVENT(ocfs2_split_extent,
480 TP_PROTO(int split_index, unsigned int c_contig_type,
481 unsigned int c_has_empty_extent,
482 unsigned int c_split_covers_rec),
483 TP_ARGS(split_index, c_contig_type,
484 c_has_empty_extent, c_split_covers_rec),
485 TP_STRUCT__entry(
486 __field(int, split_index)
487 __field(unsigned int, c_contig_type)
488 __field(unsigned int, c_has_empty_extent)
489 __field(unsigned int, c_split_covers_rec)
490 ),
491 TP_fast_assign(
492 __entry->split_index = split_index;
493 __entry->c_contig_type = c_contig_type;
494 __entry->c_has_empty_extent = c_has_empty_extent;
495 __entry->c_split_covers_rec = c_split_covers_rec;
496 ),
497 TP_printk("%d %u %u %u", __entry->split_index, __entry->c_contig_type,
498 __entry->c_has_empty_extent, __entry->c_split_covers_rec)
499);
500
501TRACE_EVENT(ocfs2_remove_extent,
502 TP_PROTO(unsigned long long owner, unsigned int cpos,
503 unsigned int len, int index,
504 unsigned int e_cpos, unsigned int clusters),
505 TP_ARGS(owner, cpos, len, index, e_cpos, clusters),
506 TP_STRUCT__entry(
507 __field(unsigned long long, owner)
508 __field(unsigned int, cpos)
509 __field(unsigned int, len)
510 __field(int, index)
511 __field(unsigned int, e_cpos)
512 __field(unsigned int, clusters)
513 ),
514 TP_fast_assign(
515 __entry->owner = owner;
516 __entry->cpos = cpos;
517 __entry->len = len;
518 __entry->index = index;
519 __entry->e_cpos = e_cpos;
520 __entry->clusters = clusters;
521 ),
522 TP_printk("%llu %u %u %d %u %u",
523 __entry->owner, __entry->cpos, __entry->len, __entry->index,
524 __entry->e_cpos, __entry->clusters)
525);
526
527TRACE_EVENT(ocfs2_commit_truncate,
528 TP_PROTO(unsigned long long ino, unsigned int new_cpos,
529 unsigned int clusters, unsigned int depth),
530 TP_ARGS(ino, new_cpos, clusters, depth),
531 TP_STRUCT__entry(
532 __field(unsigned long long, ino)
533 __field(unsigned int, new_cpos)
534 __field(unsigned int, clusters)
535 __field(unsigned int, depth)
536 ),
537 TP_fast_assign(
538 __entry->ino = ino;
539 __entry->new_cpos = new_cpos;
540 __entry->clusters = clusters;
541 __entry->depth = depth;
542 ),
543 TP_printk("%llu %u %u %u",
544 __entry->ino, __entry->new_cpos,
545 __entry->clusters, __entry->depth)
546);
547
548TRACE_EVENT(ocfs2_validate_extent_block,
549 TP_PROTO(unsigned long long blkno),
550 TP_ARGS(blkno),
551 TP_STRUCT__entry(
552 __field(unsigned long long, blkno)
553 ),
554 TP_fast_assign(
555 __entry->blkno = blkno;
556 ),
557 TP_printk("%llu ", __entry->blkno)
558);
559
560TRACE_EVENT(ocfs2_rotate_leaf,
561 TP_PROTO(unsigned int insert_cpos, int insert_index,
562 int has_empty, int next_free,
563 unsigned int l_count),
564 TP_ARGS(insert_cpos, insert_index, has_empty,
565 next_free, l_count),
566 TP_STRUCT__entry(
567 __field(unsigned int, insert_cpos)
568 __field(int, insert_index)
569 __field(int, has_empty)
570 __field(int, next_free)
571 __field(unsigned int, l_count)
572 ),
573 TP_fast_assign(
574 __entry->insert_cpos = insert_cpos;
575 __entry->insert_index = insert_index;
576 __entry->has_empty = has_empty;
577 __entry->next_free = next_free;
578 __entry->l_count = l_count;
579 ),
580 TP_printk("%u %d %d %d %u", __entry->insert_cpos,
581 __entry->insert_index, __entry->has_empty,
582 __entry->next_free, __entry->l_count)
583);
584
585TRACE_EVENT(ocfs2_add_clusters_in_btree_ret,
586 TP_PROTO(int status, int reason, int err),
587 TP_ARGS(status, reason, err),
588 TP_STRUCT__entry(
589 __field(int, status)
590 __field(int, reason)
591 __field(int, err)
592 ),
593 TP_fast_assign(
594 __entry->status = status;
595 __entry->reason = reason;
596 __entry->err = err;
597 ),
598 TP_printk("%d %d %d", __entry->status,
599 __entry->reason, __entry->err)
600);
601
602TRACE_EVENT(ocfs2_mark_extent_written,
603 TP_PROTO(unsigned long long owner, unsigned int cpos,
604 unsigned int len, unsigned int phys),
605 TP_ARGS(owner, cpos, len, phys),
606 TP_STRUCT__entry(
607 __field(unsigned long long, owner)
608 __field(unsigned int, cpos)
609 __field(unsigned int, len)
610 __field(unsigned int, phys)
611 ),
612 TP_fast_assign(
613 __entry->owner = owner;
614 __entry->cpos = cpos;
615 __entry->len = len;
616 __entry->phys = phys;
617 ),
618 TP_printk("%llu %u %u %u",
619 __entry->owner, __entry->cpos,
620 __entry->len, __entry->phys)
621);
622
623DECLARE_EVENT_CLASS(ocfs2__truncate_log_ops,
624 TP_PROTO(unsigned long long blkno, int index,
625 unsigned int start, unsigned int num),
626 TP_ARGS(blkno, index, start, num),
627 TP_STRUCT__entry(
628 __field(unsigned long long, blkno)
629 __field(int, index)
630 __field(unsigned int, start)
631 __field(unsigned int, num)
632 ),
633 TP_fast_assign(
634 __entry->blkno = blkno;
635 __entry->index = index;
636 __entry->start = start;
637 __entry->num = num;
638 ),
639 TP_printk("%llu %d %u %u",
640 __entry->blkno, __entry->index,
641 __entry->start, __entry->num)
642);
643
644#define DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(name) \
645DEFINE_EVENT(ocfs2__truncate_log_ops, name, \
646 TP_PROTO(unsigned long long blkno, int index, \
647 unsigned int start, unsigned int num), \
648 TP_ARGS(blkno, index, start, num))
649
650DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_truncate_log_append);
651
652DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_replay_truncate_records);
653
654DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_flush_truncate_log);
655
656DEFINE_OCFS2_INT_EVENT(ocfs2_begin_truncate_log_recovery);
657
658DEFINE_OCFS2_INT_EVENT(ocfs2_truncate_log_recovery_num);
659
660DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_complete_truncate_log_recovery);
661
662DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_free_cached_blocks);
663
664DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_cache_cluster_dealloc);
665
666DEFINE_OCFS2_INT_INT_EVENT(ocfs2_run_deallocs);
667
668TRACE_EVENT(ocfs2_cache_block_dealloc,
669 TP_PROTO(int type, int slot, unsigned long long suballoc,
670 unsigned long long blkno, unsigned int bit),
671 TP_ARGS(type, slot, suballoc, blkno, bit),
672 TP_STRUCT__entry(
673 __field(int, type)
674 __field(int, slot)
675 __field(unsigned long long, suballoc)
676 __field(unsigned long long, blkno)
677 __field(unsigned int, bit)
678 ),
679 TP_fast_assign(
680 __entry->type = type;
681 __entry->slot = slot;
682 __entry->suballoc = suballoc;
683 __entry->blkno = blkno;
684 __entry->bit = bit;
685 ),
686 TP_printk("%d %d %llu %llu %u",
687 __entry->type, __entry->slot, __entry->suballoc,
688 __entry->blkno, __entry->bit)
689);
690
691/* End of trace events for fs/ocfs2/alloc.c. */
692
693/* Trace events for fs/ocfs2/localalloc.c. */
694
695DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_la_set_sizes);
696
697DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_alloc_should_use_local);
698
699DEFINE_OCFS2_INT_EVENT(ocfs2_load_local_alloc);
700
701DEFINE_OCFS2_INT_EVENT(ocfs2_begin_local_alloc_recovery);
702
703DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_reserve_local_alloc_bits);
704
705DEFINE_OCFS2_UINT_EVENT(ocfs2_local_alloc_count_bits);
706
707DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits_search_bitmap);
708
709DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits);
710
711DEFINE_OCFS2_INT_INT_EVENT(ocfs2_sync_local_to_main);
712
713TRACE_EVENT(ocfs2_sync_local_to_main_free,
714 TP_PROTO(int count, int bit, unsigned long long start_blk,
715 unsigned long long blkno),
716 TP_ARGS(count, bit, start_blk, blkno),
717 TP_STRUCT__entry(
718 __field(int, count)
719 __field(int, bit)
720 __field(unsigned long long, start_blk)
721 __field(unsigned long long, blkno)
722 ),
723 TP_fast_assign(
724 __entry->count = count;
725 __entry->bit = bit;
726 __entry->start_blk = start_blk;
727 __entry->blkno = blkno;
728 ),
729 TP_printk("%d %d %llu %llu",
730 __entry->count, __entry->bit, __entry->start_blk,
731 __entry->blkno)
732);
733
734DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_new_window);
735
736DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_local_alloc_new_window_result);
737
738/* End of trace events for fs/ocfs2/localalloc.c. */
739
740/* Trace events for fs/ocfs2/resize.c. */
741
742DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_update_last_group_and_inode);
743
744DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_group_extend);
745
746DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_group_add);
747
748/* End of trace events for fs/ocfs2/resize.c. */
749
750/* Trace events for fs/ocfs2/suballoc.c. */
751
752DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_group_descriptor);
753
754DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_contig);
755
756DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_discontig);
757
758DEFINE_OCFS2_ULL_EVENT(ocfs2_block_group_alloc);
759
760DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_nospc);
761
762DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_no_new_group);
763
764DEFINE_OCFS2_ULL_EVENT(ocfs2_reserve_new_inode_new_group);
765
766DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_set_bits);
767
768TRACE_EVENT(ocfs2_relink_block_group,
769 TP_PROTO(unsigned long long i_blkno, unsigned int chain,
770 unsigned long long bg_blkno,
771 unsigned long long prev_blkno),
772 TP_ARGS(i_blkno, chain, bg_blkno, prev_blkno),
773 TP_STRUCT__entry(
774 __field(unsigned long long, i_blkno)
775 __field(unsigned int, chain)
776 __field(unsigned long long, bg_blkno)
777 __field(unsigned long long, prev_blkno)
778 ),
779 TP_fast_assign(
780 __entry->i_blkno = i_blkno;
781 __entry->chain = chain;
782 __entry->bg_blkno = bg_blkno;
783 __entry->prev_blkno = prev_blkno;
784 ),
785 TP_printk("%llu %u %llu %llu",
786 __entry->i_blkno, __entry->chain, __entry->bg_blkno,
787 __entry->prev_blkno)
788);
789
790DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_cluster_group_search_wrong_max_bits);
791
792DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cluster_group_search_max_block);
793
794DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_block_group_search_max_block);
795
796DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_search_chain_begin);
797
798DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_succ);
799
800DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_end);
801
802DEFINE_OCFS2_UINT_EVENT(ocfs2_claim_suballoc_bits);
803
804DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_claim_new_inode_at_loc);
805
806DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_clear_bits);
807
808TRACE_EVENT(ocfs2_free_suballoc_bits,
809 TP_PROTO(unsigned long long inode, unsigned long long group,
810 unsigned int start_bit, unsigned int count),
811 TP_ARGS(inode, group, start_bit, count),
812 TP_STRUCT__entry(
813 __field(unsigned long long, inode)
814 __field(unsigned long long, group)
815 __field(unsigned int, start_bit)
816 __field(unsigned int, count)
817 ),
818 TP_fast_assign(
819 __entry->inode = inode;
820 __entry->group = group;
821 __entry->start_bit = start_bit;
822 __entry->count = count;
823 ),
824 TP_printk("%llu %llu %u %u", __entry->inode, __entry->group,
825 __entry->start_bit, __entry->count)
826);
827
828TRACE_EVENT(ocfs2_free_clusters,
829 TP_PROTO(unsigned long long bg_blkno, unsigned long long start_blk,
830 unsigned int start_bit, unsigned int count),
831 TP_ARGS(bg_blkno, start_blk, start_bit, count),
832 TP_STRUCT__entry(
833 __field(unsigned long long, bg_blkno)
834 __field(unsigned long long, start_blk)
835 __field(unsigned int, start_bit)
836 __field(unsigned int, count)
837 ),
838 TP_fast_assign(
839 __entry->bg_blkno = bg_blkno;
840 __entry->start_blk = start_blk;
841 __entry->start_bit = start_bit;
842 __entry->count = count;
843 ),
844 TP_printk("%llu %llu %u %u", __entry->bg_blkno, __entry->start_blk,
845 __entry->start_bit, __entry->count)
846);
847
848DEFINE_OCFS2_ULL_EVENT(ocfs2_get_suballoc_slot_bit);
849
850DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_test_suballoc_bit);
851
852DEFINE_OCFS2_ULL_EVENT(ocfs2_test_inode_bit);
853
854/* End of trace events for fs/ocfs2/suballoc.c. */
855
856/* Trace events for fs/ocfs2/refcounttree.c. */
857
858DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_refcount_block);
859
860DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_refcount_trees);
861
862DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree);
863
864DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree_blkno);
865
866DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_change_refcount_rec);
867
868DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_expand_inline_ref_root);
869
870DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_divide_leaf_refcount_block);
871
872DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_new_leaf_refcount_block);
873
874DECLARE_EVENT_CLASS(ocfs2__refcount_tree_ops,
875 TP_PROTO(unsigned long long blkno, int index,
876 unsigned long long cpos,
877 unsigned int clusters, unsigned int refcount),
878 TP_ARGS(blkno, index, cpos, clusters, refcount),
879 TP_STRUCT__entry(
880 __field(unsigned long long, blkno)
881 __field(int, index)
882 __field(unsigned long long, cpos)
883 __field(unsigned int, clusters)
884 __field(unsigned int, refcount)
885 ),
886 TP_fast_assign(
887 __entry->blkno = blkno;
888 __entry->index = index;
889 __entry->cpos = cpos;
890 __entry->clusters = clusters;
891 __entry->refcount = refcount;
892 ),
893 TP_printk("%llu %d %llu %u %u", __entry->blkno, __entry->index,
894 __entry->cpos, __entry->clusters, __entry->refcount)
895);
896
897#define DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(name) \
898DEFINE_EVENT(ocfs2__refcount_tree_ops, name, \
899 TP_PROTO(unsigned long long blkno, int index, \
900 unsigned long long cpos, \
901 unsigned int count, unsigned int refcount), \
902 TP_ARGS(blkno, index, cpos, count, refcount))
903
904DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_insert_refcount_rec);
905
906TRACE_EVENT(ocfs2_split_refcount_rec,
907 TP_PROTO(unsigned long long cpos,
908 unsigned int clusters, unsigned int refcount,
909 unsigned long long split_cpos,
910 unsigned int split_clusters, unsigned int split_refcount),
911 TP_ARGS(cpos, clusters, refcount,
912 split_cpos, split_clusters, split_refcount),
913 TP_STRUCT__entry(
914 __field(unsigned long long, cpos)
915 __field(unsigned int, clusters)
916 __field(unsigned int, refcount)
917 __field(unsigned long long, split_cpos)
918 __field(unsigned int, split_clusters)
919 __field(unsigned int, split_refcount)
920 ),
921 TP_fast_assign(
922 __entry->cpos = cpos;
923 __entry->clusters = clusters;
924 __entry->refcount = refcount;
925 __entry->split_cpos = split_cpos;
926 __entry->split_clusters = split_clusters;
927 __entry->split_refcount = split_refcount;
928 ),
929 TP_printk("%llu %u %u %llu %u %u",
930 __entry->cpos, __entry->clusters, __entry->refcount,
931 __entry->split_cpos, __entry->split_clusters,
932 __entry->split_refcount)
933);
934
935DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_split_refcount_rec_insert);
936
937DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_increase_refcount_begin);
938
939DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_change);
940
941DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_increase_refcount_insert);
942
943DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_split);
944
945DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_remove_refcount_extent);
946
947DEFINE_OCFS2_ULL_EVENT(ocfs2_restore_refcount_block);
948
949DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_decrease_refcount_rec);
950
951TRACE_EVENT(ocfs2_decrease_refcount,
952 TP_PROTO(unsigned long long owner,
953 unsigned long long cpos,
954 unsigned int len, int delete),
955 TP_ARGS(owner, cpos, len, delete),
956 TP_STRUCT__entry(
957 __field(unsigned long long, owner)
958 __field(unsigned long long, cpos)
959 __field(unsigned int, len)
960 __field(int, delete)
961 ),
962 TP_fast_assign(
963 __entry->owner = owner;
964 __entry->cpos = cpos;
965 __entry->len = len;
966 __entry->delete = delete;
967 ),
968 TP_printk("%llu %llu %u %d",
969 __entry->owner, __entry->cpos, __entry->len, __entry->delete)
970);
971
972DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_mark_extent_refcounted);
973
974DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_calc_refcount_meta_credits);
975
976TRACE_EVENT(ocfs2_calc_refcount_meta_credits_iterate,
977 TP_PROTO(int recs_add, unsigned long long cpos,
978 unsigned int clusters, unsigned long long r_cpos,
979 unsigned int r_clusters, unsigned int refcount, int index),
980 TP_ARGS(recs_add, cpos, clusters, r_cpos, r_clusters, refcount, index),
981 TP_STRUCT__entry(
982 __field(int, recs_add)
983 __field(unsigned long long, cpos)
984 __field(unsigned int, clusters)
985 __field(unsigned long long, r_cpos)
986 __field(unsigned int, r_clusters)
987 __field(unsigned int, refcount)
988 __field(int, index)
989 ),
990 TP_fast_assign(
991 __entry->recs_add = recs_add;
992 __entry->cpos = cpos;
993 __entry->clusters = clusters;
994 __entry->r_cpos = r_cpos;
995 __entry->r_clusters = r_clusters;
996 __entry->refcount = refcount;
997 __entry->index = index;
998 ),
999 TP_printk("%d %llu %u %llu %u %u %d",
1000 __entry->recs_add, __entry->cpos, __entry->clusters,
1001 __entry->r_cpos, __entry->r_clusters,
1002 __entry->refcount, __entry->index)
1003);
1004
1005DEFINE_OCFS2_INT_INT_EVENT(ocfs2_add_refcount_flag);
1006
1007DEFINE_OCFS2_INT_INT_EVENT(ocfs2_prepare_refcount_change_for_del);
1008
1009DEFINE_OCFS2_INT_INT_EVENT(ocfs2_lock_refcount_allocators);
1010
1011DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_page);
1012
1013DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_jbd);
1014
1015TRACE_EVENT(ocfs2_clear_ext_refcount,
1016 TP_PROTO(unsigned long long ino, unsigned int cpos,
1017 unsigned int len, unsigned int p_cluster,
1018 unsigned int ext_flags),
1019 TP_ARGS(ino, cpos, len, p_cluster, ext_flags),
1020 TP_STRUCT__entry(
1021 __field(unsigned long long, ino)
1022 __field(unsigned int, cpos)
1023 __field(unsigned int, len)
1024 __field(unsigned int, p_cluster)
1025 __field(unsigned int, ext_flags)
1026 ),
1027 TP_fast_assign(
1028 __entry->ino = ino;
1029 __entry->cpos = cpos;
1030 __entry->len = len;
1031 __entry->p_cluster = p_cluster;
1032 __entry->ext_flags = ext_flags;
1033 ),
1034 TP_printk("%llu %u %u %u %u",
1035 __entry->ino, __entry->cpos, __entry->len,
1036 __entry->p_cluster, __entry->ext_flags)
1037);
1038
1039TRACE_EVENT(ocfs2_replace_clusters,
1040 TP_PROTO(unsigned long long ino, unsigned int cpos,
1041 unsigned int old, unsigned int new, unsigned int len,
1042 unsigned int ext_flags),
1043 TP_ARGS(ino, cpos, old, new, len, ext_flags),
1044 TP_STRUCT__entry(
1045 __field(unsigned long long, ino)
1046 __field(unsigned int, cpos)
1047 __field(unsigned int, old)
1048 __field(unsigned int, new)
1049 __field(unsigned int, len)
1050 __field(unsigned int, ext_flags)
1051 ),
1052 TP_fast_assign(
1053 __entry->ino = ino;
1054 __entry->cpos = cpos;
1055 __entry->old = old;
1056 __entry->new = new;
1057 __entry->len = len;
1058 __entry->ext_flags = ext_flags;
1059 ),
1060 TP_printk("%llu %u %u %u %u %u",
1061 __entry->ino, __entry->cpos, __entry->old, __entry->new,
1062 __entry->len, __entry->ext_flags)
1063);
1064
1065DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_make_clusters_writable);
1066
1067TRACE_EVENT(ocfs2_refcount_cow_hunk,
1068 TP_PROTO(unsigned long long ino, unsigned int cpos,
1069 unsigned int write_len, unsigned int max_cpos,
1070 unsigned int cow_start, unsigned int cow_len),
1071 TP_ARGS(ino, cpos, write_len, max_cpos, cow_start, cow_len),
1072 TP_STRUCT__entry(
1073 __field(unsigned long long, ino)
1074 __field(unsigned int, cpos)
1075 __field(unsigned int, write_len)
1076 __field(unsigned int, max_cpos)
1077 __field(unsigned int, cow_start)
1078 __field(unsigned int, cow_len)
1079 ),
1080 TP_fast_assign(
1081 __entry->ino = ino;
1082 __entry->cpos = cpos;
1083 __entry->write_len = write_len;
1084 __entry->max_cpos = max_cpos;
1085 __entry->cow_start = cow_start;
1086 __entry->cow_len = cow_len;
1087 ),
1088 TP_printk("%llu %u %u %u %u %u",
1089 __entry->ino, __entry->cpos, __entry->write_len,
1090 __entry->max_cpos, __entry->cow_start, __entry->cow_len)
1091);
1092
1093/* End of trace events for fs/ocfs2/refcounttree.c. */
1094
1095/* Trace events for fs/ocfs2/aops.c. */
1096
1097DECLARE_EVENT_CLASS(ocfs2__get_block,
1098 TP_PROTO(unsigned long long ino, unsigned long long iblock,
1099 void *bh_result, int create),
1100 TP_ARGS(ino, iblock, bh_result, create),
1101 TP_STRUCT__entry(
1102 __field(unsigned long long, ino)
1103 __field(unsigned long long, iblock)
1104 __field(void *, bh_result)
1105 __field(int, create)
1106 ),
1107 TP_fast_assign(
1108 __entry->ino = ino;
1109 __entry->iblock = iblock;
1110 __entry->bh_result = bh_result;
1111 __entry->create = create;
1112 ),
1113 TP_printk("%llu %llu %p %d",
1114 __entry->ino, __entry->iblock,
1115 __entry->bh_result, __entry->create)
1116);
1117
1118#define DEFINE_OCFS2_GET_BLOCK_EVENT(name) \
1119DEFINE_EVENT(ocfs2__get_block, name, \
1120 TP_PROTO(unsigned long long ino, unsigned long long iblock, \
1121 void *bh_result, int create), \
1122 TP_ARGS(ino, iblock, bh_result, create))
1123
1124DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_symlink_get_block);
1125
1126DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_get_block);
1127
1128DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_get_block_end);
1129
1130DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_readpage);
1131
1132DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_writepage);
1133
1134DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_bmap);
1135
1136TRACE_EVENT(ocfs2_try_to_write_inline_data,
1137 TP_PROTO(unsigned long long ino, unsigned int len,
1138 unsigned long long pos, unsigned int flags),
1139 TP_ARGS(ino, len, pos, flags),
1140 TP_STRUCT__entry(
1141 __field(unsigned long long, ino)
1142 __field(unsigned int, len)
1143 __field(unsigned long long, pos)
1144 __field(unsigned int, flags)
1145 ),
1146 TP_fast_assign(
1147 __entry->ino = ino;
1148 __entry->len = len;
1149 __entry->pos = pos;
1150 __entry->flags = flags;
1151 ),
1152 TP_printk("%llu %u %llu 0x%x",
1153 __entry->ino, __entry->len, __entry->pos, __entry->flags)
1154);
1155
1156TRACE_EVENT(ocfs2_write_begin_nolock,
1157 TP_PROTO(unsigned long long ino,
1158 long long i_size, unsigned int i_clusters,
1159 unsigned long long pos, unsigned int len,
1160 unsigned int flags, void *page,
1161 unsigned int clusters, unsigned int extents_to_split),
1162 TP_ARGS(ino, i_size, i_clusters, pos, len, flags,
1163 page, clusters, extents_to_split),
1164 TP_STRUCT__entry(
1165 __field(unsigned long long, ino)
1166 __field(long long, i_size)
1167 __field(unsigned int, i_clusters)
1168 __field(unsigned long long, pos)
1169 __field(unsigned int, len)
1170 __field(unsigned int, flags)
1171 __field(void *, page)
1172 __field(unsigned int, clusters)
1173 __field(unsigned int, extents_to_split)
1174 ),
1175 TP_fast_assign(
1176 __entry->ino = ino;
1177 __entry->i_size = i_size;
1178 __entry->i_clusters = i_clusters;
1179 __entry->pos = pos;
1180 __entry->len = len;
1181 __entry->flags = flags;
1182 __entry->page = page;
1183 __entry->clusters = clusters;
1184 __entry->extents_to_split = extents_to_split;
1185 ),
1186 TP_printk("%llu %lld %u %llu %u %u %p %u %u",
1187 __entry->ino, __entry->i_size, __entry->i_clusters,
1188 __entry->pos, __entry->len,
1189 __entry->flags, __entry->page, __entry->clusters,
1190 __entry->extents_to_split)
1191);
1192
1193TRACE_EVENT(ocfs2_write_end_inline,
1194 TP_PROTO(unsigned long long ino,
1195 unsigned long long pos, unsigned int copied,
1196 unsigned int id_count, unsigned int features),
1197 TP_ARGS(ino, pos, copied, id_count, features),
1198 TP_STRUCT__entry(
1199 __field(unsigned long long, ino)
1200 __field(unsigned long long, pos)
1201 __field(unsigned int, copied)
1202 __field(unsigned int, id_count)
1203 __field(unsigned int, features)
1204 ),
1205 TP_fast_assign(
1206 __entry->ino = ino;
1207 __entry->pos = pos;
1208 __entry->copied = copied;
1209 __entry->id_count = id_count;
1210 __entry->features = features;
1211 ),
1212 TP_printk("%llu %llu %u %u %u",
1213 __entry->ino, __entry->pos, __entry->copied,
1214 __entry->id_count, __entry->features)
1215);
1216
1217/* End of trace events for fs/ocfs2/aops.c. */
1218
1219/* Trace events for fs/ocfs2/mmap.c. */
1220
1221TRACE_EVENT(ocfs2_fault,
1222 TP_PROTO(unsigned long long ino,
1223 void *area, void *page, unsigned long pgoff),
1224 TP_ARGS(ino, area, page, pgoff),
1225 TP_STRUCT__entry(
1226 __field(unsigned long long, ino)
1227 __field(void *, area)
1228 __field(void *, page)
1229 __field(unsigned long, pgoff)
1230 ),
1231 TP_fast_assign(
1232 __entry->ino = ino;
1233 __entry->area = area;
1234 __entry->page = page;
1235 __entry->pgoff = pgoff;
1236 ),
1237 TP_printk("%llu %p %p %lu",
1238 __entry->ino, __entry->area, __entry->page, __entry->pgoff)
1239);
1240
1241/* End of trace events for fs/ocfs2/mmap.c. */
1242
1243/* Trace events for fs/ocfs2/file.c. */
1244
1245DECLARE_EVENT_CLASS(ocfs2__file_ops,
1246 TP_PROTO(void *inode, void *file, void *dentry,
1247 unsigned long long ino,
1248 unsigned int d_len, const unsigned char *d_name,
1249 unsigned long long para),
1250 TP_ARGS(inode, file, dentry, ino, d_len, d_name, para),
1251 TP_STRUCT__entry(
1252 __field(void *, inode)
1253 __field(void *, file)
1254 __field(void *, dentry)
1255 __field(unsigned long long, ino)
1256 __field(unsigned int, d_len)
1257 __string(d_name, d_name)
1258 __field(unsigned long long, para)
1259 ),
1260 TP_fast_assign(
1261 __entry->inode = inode;
1262 __entry->file = file;
1263 __entry->dentry = dentry;
1264 __entry->ino = ino;
1265 __entry->d_len = d_len;
1266 __assign_str(d_name, d_name);
1267 __entry->para = para;
1268 ),
1269 TP_printk("%p %p %p %llu %llu %.*s", __entry->inode, __entry->file,
1270 __entry->dentry, __entry->ino, __entry->para,
1271 __entry->d_len, __get_str(d_name))
1272);
1273
1274#define DEFINE_OCFS2_FILE_OPS(name) \
1275DEFINE_EVENT(ocfs2__file_ops, name, \
1276TP_PROTO(void *inode, void *file, void *dentry, \
1277 unsigned long long ino, \
1278 unsigned int d_len, const unsigned char *d_name, \
1279 unsigned long long mode), \
1280 TP_ARGS(inode, file, dentry, ino, d_len, d_name, mode))
1281
1282DEFINE_OCFS2_FILE_OPS(ocfs2_file_open);
1283
1284DEFINE_OCFS2_FILE_OPS(ocfs2_file_release);
1285
1286DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file);
1287
1288DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
1289
1290DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
1291
1292DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
1293
1294DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
1295
1296DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
1297
1298DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_truncate_file_error);
1299
1300TRACE_EVENT(ocfs2_extend_allocation,
1301 TP_PROTO(unsigned long long ip_blkno, unsigned long long size,
1302 unsigned int clusters, unsigned int clusters_to_add,
1303 int why, int restart_func),
1304 TP_ARGS(ip_blkno, size, clusters, clusters_to_add, why, restart_func),
1305 TP_STRUCT__entry(
1306 __field(unsigned long long, ip_blkno)
1307 __field(unsigned long long, size)
1308 __field(unsigned int, clusters)
1309 __field(unsigned int, clusters_to_add)
1310 __field(int, why)
1311 __field(int, restart_func)
1312 ),
1313 TP_fast_assign(
1314 __entry->ip_blkno = ip_blkno;
1315 __entry->size = size;
1316 __entry->clusters = clusters;
1317 __entry->clusters_to_add = clusters_to_add;
1318 __entry->why = why;
1319 __entry->restart_func = restart_func;
1320 ),
1321 TP_printk("%llu %llu %u %u %d %d",
1322 __entry->ip_blkno, __entry->size, __entry->clusters,
1323 __entry->clusters_to_add, __entry->why, __entry->restart_func)
1324);
1325
1326TRACE_EVENT(ocfs2_extend_allocation_end,
1327 TP_PROTO(unsigned long long ino,
1328 unsigned int di_clusters, unsigned long long di_size,
1329 unsigned int ip_clusters, unsigned long long i_size),
1330 TP_ARGS(ino, di_clusters, di_size, ip_clusters, i_size),
1331 TP_STRUCT__entry(
1332 __field(unsigned long long, ino)
1333 __field(unsigned int, di_clusters)
1334 __field(unsigned long long, di_size)
1335 __field(unsigned int, ip_clusters)
1336 __field(unsigned long long, i_size)
1337 ),
1338 TP_fast_assign(
1339 __entry->ino = ino;
1340 __entry->di_clusters = di_clusters;
1341 __entry->di_size = di_size;
1342 __entry->ip_clusters = ip_clusters;
1343 __entry->i_size = i_size;
1344 ),
1345 TP_printk("%llu %u %llu %u %llu", __entry->ino, __entry->di_clusters,
1346 __entry->di_size, __entry->ip_clusters, __entry->i_size)
1347);
1348
1349TRACE_EVENT(ocfs2_write_zero_page,
1350 TP_PROTO(unsigned long long ino,
1351 unsigned long long abs_from, unsigned long long abs_to,
1352 unsigned long index, unsigned int zero_from,
1353 unsigned int zero_to),
1354 TP_ARGS(ino, abs_from, abs_to, index, zero_from, zero_to),
1355 TP_STRUCT__entry(
1356 __field(unsigned long long, ino)
1357 __field(unsigned long long, abs_from)
1358 __field(unsigned long long, abs_to)
1359 __field(unsigned long, index)
1360 __field(unsigned int, zero_from)
1361 __field(unsigned int, zero_to)
1362 ),
1363 TP_fast_assign(
1364 __entry->ino = ino;
1365 __entry->abs_from = abs_from;
1366 __entry->abs_to = abs_to;
1367 __entry->index = index;
1368 __entry->zero_from = zero_from;
1369 __entry->zero_to = zero_to;
1370 ),
1371 TP_printk("%llu %llu %llu %lu %u %u", __entry->ino,
1372 __entry->abs_from, __entry->abs_to,
1373 __entry->index, __entry->zero_from, __entry->zero_to)
1374);
1375
1376DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend_range);
1377
1378DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend);
1379
1380TRACE_EVENT(ocfs2_setattr,
1381 TP_PROTO(void *inode, void *dentry,
1382 unsigned long long ino,
1383 unsigned int d_len, const unsigned char *d_name,
1384 unsigned int ia_valid, unsigned int ia_mode,
1385 unsigned int ia_uid, unsigned int ia_gid),
1386 TP_ARGS(inode, dentry, ino, d_len, d_name,
1387 ia_valid, ia_mode, ia_uid, ia_gid),
1388 TP_STRUCT__entry(
1389 __field(void *, inode)
1390 __field(void *, dentry)
1391 __field(unsigned long long, ino)
1392 __field(unsigned int, d_len)
1393 __string(d_name, d_name)
1394 __field(unsigned int, ia_valid)
1395 __field(unsigned int, ia_mode)
1396 __field(unsigned int, ia_uid)
1397 __field(unsigned int, ia_gid)
1398 ),
1399 TP_fast_assign(
1400 __entry->inode = inode;
1401 __entry->dentry = dentry;
1402 __entry->ino = ino;
1403 __entry->d_len = d_len;
1404 __assign_str(d_name, d_name);
1405 __entry->ia_valid = ia_valid;
1406 __entry->ia_mode = ia_mode;
1407 __entry->ia_uid = ia_uid;
1408 __entry->ia_gid = ia_gid;
1409 ),
1410 TP_printk("%p %p %llu %.*s %u %u %u %u", __entry->inode,
1411 __entry->dentry, __entry->ino, __entry->d_len,
1412 __get_str(d_name), __entry->ia_valid, __entry->ia_mode,
1413 __entry->ia_uid, __entry->ia_gid)
1414);
1415
1416DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_write_remove_suid);
1417
1418DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_partial_clusters);
1419
1420DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range1);
1421
1422DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range2);
1423
1424DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
1425
1426TRACE_EVENT(ocfs2_prepare_inode_for_write,
1427 TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
1428 int appending, unsigned long count,
1429 int *direct_io, int *has_refcount),
1430 TP_ARGS(ino, saved_pos, appending, count, direct_io, has_refcount),
1431 TP_STRUCT__entry(
1432 __field(unsigned long long, ino)
1433 __field(unsigned long long, saved_pos)
1434 __field(int, appending)
1435 __field(unsigned long, count)
1436 __field(int, direct_io)
1437 __field(int, has_refcount)
1438 ),
1439 TP_fast_assign(
1440 __entry->ino = ino;
1441 __entry->saved_pos = saved_pos;
1442 __entry->appending = appending;
1443 __entry->count = count;
1444 __entry->direct_io = direct_io ? *direct_io : -1;
1445 __entry->has_refcount = has_refcount ? *has_refcount : -1;
1446 ),
1447 TP_printk("%llu %llu %d %lu %d %d", __entry->ino,
1448 __entry->saved_pos, __entry->appending, __entry->count,
1449 __entry->direct_io, __entry->has_refcount)
1450);
1451
1452DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
1453
1454/* End of trace events for fs/ocfs2/file.c. */
1455
1456/* Trace events for fs/ocfs2/inode.c. */
1457
1458TRACE_EVENT(ocfs2_iget_begin,
1459 TP_PROTO(unsigned long long ino, unsigned int flags, int sysfile_type),
1460 TP_ARGS(ino, flags, sysfile_type),
1461 TP_STRUCT__entry(
1462 __field(unsigned long long, ino)
1463 __field(unsigned int, flags)
1464 __field(int, sysfile_type)
1465 ),
1466 TP_fast_assign(
1467 __entry->ino = ino;
1468 __entry->flags = flags;
1469 __entry->sysfile_type = sysfile_type;
1470 ),
1471 TP_printk("%llu %u %d", __entry->ino,
1472 __entry->flags, __entry->sysfile_type)
1473);
1474
1475DEFINE_OCFS2_ULL_EVENT(ocfs2_iget5_locked);
1476
1477TRACE_EVENT(ocfs2_iget_end,
1478 TP_PROTO(void *inode, unsigned long long ino),
1479 TP_ARGS(inode, ino),
1480 TP_STRUCT__entry(
1481 __field(void *, inode)
1482 __field(unsigned long long, ino)
1483 ),
1484 TP_fast_assign(
1485 __entry->inode = inode;
1486 __entry->ino = ino;
1487 ),
1488 TP_printk("%p %llu", __entry->inode, __entry->ino)
1489);
1490
1491TRACE_EVENT(ocfs2_find_actor,
1492 TP_PROTO(void *inode, unsigned long long ino,
1493 void *args, unsigned long long fi_blkno),
1494 TP_ARGS(inode, ino, args, fi_blkno),
1495 TP_STRUCT__entry(
1496 __field(void *, inode)
1497 __field(unsigned long long, ino)
1498 __field(void *, args)
1499 __field(unsigned long long, fi_blkno)
1500 ),
1501 TP_fast_assign(
1502 __entry->inode = inode;
1503 __entry->ino = ino;
1504 __entry->args = args;
1505 __entry->fi_blkno = fi_blkno;
1506 ),
1507 TP_printk("%p %llu %p %llu", __entry->inode, __entry->ino,
1508 __entry->args, __entry->fi_blkno)
1509);
1510
1511DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_populate_inode);
1512
1513DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_read_locked_inode);
1514
1515DEFINE_OCFS2_INT_INT_EVENT(ocfs2_check_orphan_recovery_state);
1516
1517DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_inode_block);
1518
1519TRACE_EVENT(ocfs2_inode_is_valid_to_delete,
1520 TP_PROTO(void *task, void *dc_task, unsigned long long ino,
1521 unsigned int flags),
1522 TP_ARGS(task, dc_task, ino, flags),
1523 TP_STRUCT__entry(
1524 __field(void *, task)
1525 __field(void *, dc_task)
1526 __field(unsigned long long, ino)
1527 __field(unsigned int, flags)
1528 ),
1529 TP_fast_assign(
1530 __entry->task = task;
1531 __entry->dc_task = dc_task;
1532 __entry->ino = ino;
1533 __entry->flags = flags;
1534 ),
1535 TP_printk("%p %p %llu %u", __entry->task, __entry->dc_task,
1536 __entry->ino, __entry->flags)
1537);
1538
1539DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_query_inode_wipe_begin);
1540
1541DEFINE_OCFS2_UINT_EVENT(ocfs2_query_inode_wipe_succ);
1542
1543DEFINE_OCFS2_INT_INT_EVENT(ocfs2_query_inode_wipe_end);
1544
1545DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_cleanup_delete_inode);
1546
1547DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_delete_inode);
1548
1549DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_clear_inode);
1550
1551DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_drop_inode);
1552
1553TRACE_EVENT(ocfs2_inode_revalidate,
1554 TP_PROTO(void *inode, unsigned long long ino,
1555 unsigned int flags),
1556 TP_ARGS(inode, ino, flags),
1557 TP_STRUCT__entry(
1558 __field(void *, inode)
1559 __field(unsigned long long, ino)
1560 __field(unsigned int, flags)
1561 ),
1562 TP_fast_assign(
1563 __entry->inode = inode;
1564 __entry->ino = ino;
1565 __entry->flags = flags;
1566 ),
1567 TP_printk("%p %llu %u", __entry->inode, __entry->ino, __entry->flags)
1568);
1569
1570DEFINE_OCFS2_ULL_EVENT(ocfs2_mark_inode_dirty);
1571
1572/* End of trace events for fs/ocfs2/inode.c. */
1573
1574/* Trace events for fs/ocfs2/extent_map.c. */
1575
1576TRACE_EVENT(ocfs2_read_virt_blocks,
1577 TP_PROTO(void *inode, unsigned long long vblock, int nr,
1578 void *bhs, unsigned int flags, void *validate),
1579 TP_ARGS(inode, vblock, nr, bhs, flags, validate),
1580 TP_STRUCT__entry(
1581 __field(void *, inode)
1582 __field(unsigned long long, vblock)
1583 __field(int, nr)
1584 __field(void *, bhs)
1585 __field(unsigned int, flags)
1586 __field(void *, validate)
1587 ),
1588 TP_fast_assign(
1589 __entry->inode = inode;
1590 __entry->vblock = vblock;
1591 __entry->nr = nr;
1592 __entry->bhs = bhs;
1593 __entry->flags = flags;
1594 __entry->validate = validate;
1595 ),
1596 TP_printk("%p %llu %d %p %x %p", __entry->inode, __entry->vblock,
1597 __entry->nr, __entry->bhs, __entry->flags, __entry->validate)
1598);
1599
1600/* End of trace events for fs/ocfs2/extent_map.c. */
1601
1602/* Trace events for fs/ocfs2/slot_map.c. */
1603
1604DEFINE_OCFS2_UINT_EVENT(ocfs2_refresh_slot_info);
1605
1606DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers);
1607
1608DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers_block);
1609
1610DEFINE_OCFS2_INT_EVENT(ocfs2_find_slot);
1611
1612/* End of trace events for fs/ocfs2/slot_map.c. */
1613
1614/* Trace events for fs/ocfs2/heartbeat.c. */
1615
1616DEFINE_OCFS2_INT_EVENT(ocfs2_do_node_down);
1617
1618/* End of trace events for fs/ocfs2/heartbeat.c. */
1619
1620/* Trace events for fs/ocfs2/super.c. */
1621
1622TRACE_EVENT(ocfs2_remount,
1623 TP_PROTO(unsigned long s_flags, unsigned long osb_flags, int flags),
1624 TP_ARGS(s_flags, osb_flags, flags),
1625 TP_STRUCT__entry(
1626 __field(unsigned long, s_flags)
1627 __field(unsigned long, osb_flags)
1628 __field(int, flags)
1629 ),
1630 TP_fast_assign(
1631 __entry->s_flags = s_flags;
1632 __entry->osb_flags = osb_flags;
1633 __entry->flags = flags;
1634 ),
1635 TP_printk("%lu %lu %d", __entry->s_flags,
1636 __entry->osb_flags, __entry->flags)
1637);
1638
1639TRACE_EVENT(ocfs2_fill_super,
1640 TP_PROTO(void *sb, void *data, int silent),
1641 TP_ARGS(sb, data, silent),
1642 TP_STRUCT__entry(
1643 __field(void *, sb)
1644 __field(void *, data)
1645 __field(int, silent)
1646 ),
1647 TP_fast_assign(
1648 __entry->sb = sb;
1649 __entry->data = data;
1650 __entry->silent = silent;
1651 ),
1652 TP_printk("%p %p %d", __entry->sb,
1653 __entry->data, __entry->silent)
1654);
1655
1656TRACE_EVENT(ocfs2_parse_options,
1657 TP_PROTO(int is_remount, char *options),
1658 TP_ARGS(is_remount, options),
1659 TP_STRUCT__entry(
1660 __field(int, is_remount)
1661 __string(options, options)
1662 ),
1663 TP_fast_assign(
1664 __entry->is_remount = is_remount;
1665 __assign_str(options, options);
1666 ),
1667 TP_printk("%d %s", __entry->is_remount, __get_str(options))
1668);
1669
1670DEFINE_OCFS2_POINTER_EVENT(ocfs2_put_super);
1671
1672TRACE_EVENT(ocfs2_statfs,
1673 TP_PROTO(void *sb, void *buf),
1674 TP_ARGS(sb, buf),
1675 TP_STRUCT__entry(
1676 __field(void *, sb)
1677 __field(void *, buf)
1678 ),
1679 TP_fast_assign(
1680 __entry->sb = sb;
1681 __entry->buf = buf;
1682 ),
1683 TP_printk("%p %p", __entry->sb, __entry->buf)
1684);
1685
1686DEFINE_OCFS2_POINTER_EVENT(ocfs2_dismount_volume);
1687
1688TRACE_EVENT(ocfs2_initialize_super,
1689 TP_PROTO(char *label, char *uuid_str, unsigned long long root_dir,
1690 unsigned long long system_dir, int cluster_bits),
1691 TP_ARGS(label, uuid_str, root_dir, system_dir, cluster_bits),
1692 TP_STRUCT__entry(
1693 __string(label, label)
1694 __string(uuid_str, uuid_str)
1695 __field(unsigned long long, root_dir)
1696 __field(unsigned long long, system_dir)
1697 __field(int, cluster_bits)
1698 ),
1699 TP_fast_assign(
1700 __assign_str(label, label);
1701 __assign_str(uuid_str, uuid_str);
1702 __entry->root_dir = root_dir;
1703 __entry->system_dir = system_dir;
1704 __entry->cluster_bits = cluster_bits;
1705 ),
1706 TP_printk("%s %s %llu %llu %d", __get_str(label), __get_str(uuid_str),
1707 __entry->root_dir, __entry->system_dir, __entry->cluster_bits)
1708);
1709
1710/* End of trace events for fs/ocfs2/super.c. */
1711
1712/* Trace events for fs/ocfs2/xattr.c. */
1713
1714DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_xattr_block);
1715
1716DEFINE_OCFS2_UINT_EVENT(ocfs2_xattr_extend_allocation);
1717
1718TRACE_EVENT(ocfs2_init_xattr_set_ctxt,
1719 TP_PROTO(const char *name, int meta, int clusters, int credits),
1720 TP_ARGS(name, meta, clusters, credits),
1721 TP_STRUCT__entry(
1722 __string(name, name)
1723 __field(int, meta)
1724 __field(int, clusters)
1725 __field(int, credits)
1726 ),
1727 TP_fast_assign(
1728 __assign_str(name, name);
1729 __entry->meta = meta;
1730 __entry->clusters = clusters;
1731 __entry->credits = credits;
1732 ),
1733 TP_printk("%s %d %d %d", __get_str(name), __entry->meta,
1734 __entry->clusters, __entry->credits)
1735);
1736
1737DECLARE_EVENT_CLASS(ocfs2__xattr_find,
1738 TP_PROTO(unsigned long long ino, const char *name, int name_index,
1739 unsigned int hash, unsigned long long location,
1740 int xe_index),
1741 TP_ARGS(ino, name, name_index, hash, location, xe_index),
1742 TP_STRUCT__entry(
1743 __field(unsigned long long, ino)
1744 __string(name, name)
1745 __field(int, name_index)
1746 __field(unsigned int, hash)
1747 __field(unsigned long long, location)
1748 __field(int, xe_index)
1749 ),
1750 TP_fast_assign(
1751 __entry->ino = ino;
1752 __assign_str(name, name);
1753 __entry->name_index = name_index;
1754 __entry->hash = hash;
1755 __entry->location = location;
1756 __entry->xe_index = xe_index;
1757 ),
1758 TP_printk("%llu %s %d %u %llu %d", __entry->ino, __get_str(name),
1759 __entry->name_index, __entry->hash, __entry->location,
1760 __entry->xe_index)
1761);
1762
1763#define DEFINE_OCFS2_XATTR_FIND_EVENT(name) \
1764DEFINE_EVENT(ocfs2__xattr_find, name, \
1765TP_PROTO(unsigned long long ino, const char *name, int name_index, \
1766 unsigned int hash, unsigned long long bucket, \
1767 int xe_index), \
1768 TP_ARGS(ino, name, name_index, hash, bucket, xe_index))
1769
1770DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_bucket_find);
1771
1772DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find);
1773
1774DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find_rec);
1775
1776DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_iterate_xattr_buckets);
1777
1778DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_iterate_xattr_bucket);
1779
1780DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cp_xattr_block_to_bucket_begin);
1781
1782DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cp_xattr_block_to_bucket_end);
1783
1784DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block_begin);
1785
1786DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block);
1787
1788DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_defrag_xattr_bucket);
1789
1790DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_bucket_cross_cluster);
1791
1792DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_divide_xattr_bucket_begin);
1793
1794DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_divide_xattr_bucket_move);
1795
1796DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_cp_xattr_bucket);
1797
1798DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_buckets);
1799
1800DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_adjust_xattr_cross_cluster);
1801
1802DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_begin);
1803
1804DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_add_new_xattr_cluster);
1805
1806DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_insert);
1807
1808DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_extend_xattr_bucket);
1809
1810DEFINE_OCFS2_ULL_EVENT(ocfs2_add_new_xattr_bucket);
1811
1812DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_xattr_bucket_value_truncate);
1813
1814DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_rm_xattr_cluster);
1815
1816DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_header);
1817
1818DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_create_empty_xattr_block);
1819
1820DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_bucket);
1821
1822DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_index_block);
1823
1824DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_xattr_bucket_value_refcount);
1825
1826DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_reflink_xattr_buckets);
1827
1828DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_rec);
1829
1830/* End of trace events for fs/ocfs2/xattr.c. */
1831
1832/* Trace events for fs/ocfs2/reservations.c. */
1833
1834DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_insert);
1835
1836DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_begin);
1837
1838DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_end);
1839
1840TRACE_EVENT(ocfs2_resv_find_window_begin,
1841 TP_PROTO(unsigned int r_start, unsigned int r_end, unsigned int goal,
1842 unsigned int wanted, int empty_root),
1843 TP_ARGS(r_start, r_end, goal, wanted, empty_root),
1844 TP_STRUCT__entry(
1845 __field(unsigned int, r_start)
1846 __field(unsigned int, r_end)
1847 __field(unsigned int, goal)
1848 __field(unsigned int, wanted)
1849 __field(int, empty_root)
1850 ),
1851 TP_fast_assign(
1852 __entry->r_start = r_start;
1853 __entry->r_end = r_end;
1854 __entry->goal = goal;
1855 __entry->wanted = wanted;
1856 __entry->empty_root = empty_root;
1857 ),
1858 TP_printk("%u %u %u %u %d", __entry->r_start, __entry->r_end,
1859 __entry->goal, __entry->wanted, __entry->empty_root)
1860);
1861
1862DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_find_window_prev);
1863
1864DEFINE_OCFS2_INT_INT_EVENT(ocfs2_resv_find_window_next);
1865
1866DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cannibalize_resv_begin);
1867
1868TRACE_EVENT(ocfs2_cannibalize_resv_end,
1869 TP_PROTO(unsigned int start, unsigned int end, unsigned int len,
1870 unsigned int last_start, unsigned int last_len),
1871 TP_ARGS(start, end, len, last_start, last_len),
1872 TP_STRUCT__entry(
1873 __field(unsigned int, start)
1874 __field(unsigned int, end)
1875 __field(unsigned int, len)
1876 __field(unsigned int, last_start)
1877 __field(unsigned int, last_len)
1878 ),
1879 TP_fast_assign(
1880 __entry->start = start;
1881 __entry->end = end;
1882 __entry->len = len;
1883 __entry->last_start = last_start;
1884 __entry->last_len = last_len;
1885 ),
1886 TP_printk("%u %u %u %u %u", __entry->start, __entry->end,
1887 __entry->len, __entry->last_start, __entry->last_len)
1888);
1889
1890DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_resv_bits);
1891
1892TRACE_EVENT(ocfs2_resmap_claimed_bits_begin,
1893 TP_PROTO(unsigned int cstart, unsigned int cend, unsigned int clen,
1894 unsigned int r_start, unsigned int r_end, unsigned int r_len,
1895 unsigned int last_start, unsigned int last_len),
1896 TP_ARGS(cstart, cend, clen, r_start, r_end,
1897 r_len, last_start, last_len),
1898 TP_STRUCT__entry(
1899 __field(unsigned int, cstart)
1900 __field(unsigned int, cend)
1901 __field(unsigned int, clen)
1902 __field(unsigned int, r_start)
1903 __field(unsigned int, r_end)
1904 __field(unsigned int, r_len)
1905 __field(unsigned int, last_start)
1906 __field(unsigned int, last_len)
1907 ),
1908 TP_fast_assign(
1909 __entry->cstart = cstart;
1910 __entry->cend = cend;
1911 __entry->clen = clen;
1912 __entry->r_start = r_start;
1913 __entry->r_end = r_end;
1914 __entry->r_len = r_len;
1915 __entry->last_start = last_start;
1916 __entry->last_len = last_len;
1917 ),
1918 TP_printk("%u %u %u %u %u %u %u %u",
1919 __entry->cstart, __entry->cend, __entry->clen,
1920 __entry->r_start, __entry->r_end, __entry->r_len,
1921 __entry->last_start, __entry->last_len)
1922);
1923
1924TRACE_EVENT(ocfs2_resmap_claimed_bits_end,
1925 TP_PROTO(unsigned int start, unsigned int end, unsigned int len,
1926 unsigned int last_start, unsigned int last_len),
1927 TP_ARGS(start, end, len, last_start, last_len),
1928 TP_STRUCT__entry(
1929 __field(unsigned int, start)
1930 __field(unsigned int, end)
1931 __field(unsigned int, len)
1932 __field(unsigned int, last_start)
1933 __field(unsigned int, last_len)
1934 ),
1935 TP_fast_assign(
1936 __entry->start = start;
1937 __entry->end = end;
1938 __entry->len = len;
1939 __entry->last_start = last_start;
1940 __entry->last_len = last_len;
1941 ),
1942 TP_printk("%u %u %u %u %u", __entry->start, __entry->end,
1943 __entry->len, __entry->last_start, __entry->last_len)
1944);
1945
1946/* End of trace events for fs/ocfs2/reservations.c. */
1947
1948/* Trace events for fs/ocfs2/quota_local.c. */
1949
1950DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_recover_local_quota_file);
1951
1952DEFINE_OCFS2_INT_EVENT(ocfs2_finish_quota_recovery);
1953
1954DEFINE_OCFS2_ULL_ULL_UINT_EVENT(olq_set_dquot);
1955
1956/* End of trace events for fs/ocfs2/quota_local.c. */
1957
1958/* Trace events for fs/ocfs2/quota_global.c. */
1959
1960DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_quota_block);
1961
1962TRACE_EVENT(ocfs2_sync_dquot,
1963 TP_PROTO(unsigned int dq_id, long long dqb_curspace,
1964 long long spacechange, long long curinodes,
1965 long long inodechange),
1966 TP_ARGS(dq_id, dqb_curspace, spacechange, curinodes, inodechange),
1967 TP_STRUCT__entry(
1968 __field(unsigned int, dq_id)
1969 __field(long long, dqb_curspace)
1970 __field(long long, spacechange)
1971 __field(long long, curinodes)
1972 __field(long long, inodechange)
1973 ),
1974 TP_fast_assign(
1975 __entry->dq_id = dq_id;
1976 __entry->dqb_curspace = dqb_curspace;
1977 __entry->spacechange = spacechange;
1978 __entry->curinodes = curinodes;
1979 __entry->inodechange = inodechange;
1980 ),
1981 TP_printk("%u %lld %lld %lld %lld", __entry->dq_id,
1982 __entry->dqb_curspace, __entry->spacechange,
1983 __entry->curinodes, __entry->inodechange)
1984);
1985
1986TRACE_EVENT(ocfs2_sync_dquot_helper,
1987 TP_PROTO(unsigned int dq_id, unsigned int dq_type, unsigned long type,
1988 const char *s_id),
1989 TP_ARGS(dq_id, dq_type, type, s_id),
1990
1991 TP_STRUCT__entry(
1992 __field(unsigned int, dq_id)
1993 __field(unsigned int, dq_type)
1994 __field(unsigned long, type)
1995 __string(s_id, s_id)
1996 ),
1997 TP_fast_assign(
1998 __entry->dq_id = dq_id;
1999 __entry->dq_type = dq_type;
2000 __entry->type = type;
2001 __assign_str(s_id, s_id);
2002 ),
2003 TP_printk("%u %u %lu %s", __entry->dq_id, __entry->dq_type,
2004 __entry->type, __get_str(s_id))
2005);
2006
2007DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_write_dquot);
2008
2009DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_release_dquot);
2010
2011DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_acquire_dquot);
2012
2013DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_mark_dquot_dirty);
2014
2015/* End of trace events for fs/ocfs2/quota_global.c. */
2016
2017/* Trace events for fs/ocfs2/dir.c. */
2018DEFINE_OCFS2_INT_EVENT(ocfs2_search_dirblock);
2019
2020DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_dir_block);
2021
2022DEFINE_OCFS2_POINTER_EVENT(ocfs2_find_entry_el);
2023
2024TRACE_EVENT(ocfs2_dx_dir_search,
2025 TP_PROTO(unsigned long long ino, int namelen, const char *name,
2026 unsigned int major_hash, unsigned int minor_hash,
2027 unsigned long long blkno),
2028 TP_ARGS(ino, namelen, name, major_hash, minor_hash, blkno),
2029 TP_STRUCT__entry(
2030 __field(unsigned long long, ino)
2031 __field(int, namelen)
2032 __string(name, name)
2033 __field(unsigned int, major_hash)
2034 __field(unsigned int,minor_hash)
2035 __field(unsigned long long, blkno)
2036 ),
2037 TP_fast_assign(
2038 __entry->ino = ino;
2039 __entry->namelen = namelen;
2040 __assign_str(name, name);
2041 __entry->major_hash = major_hash;
2042 __entry->minor_hash = minor_hash;
2043 __entry->blkno = blkno;
2044 ),
2045 TP_printk("%llu %.*s %u %u %llu", __entry->ino,
2046 __entry->namelen, __get_str(name),
2047 __entry->major_hash, __entry->minor_hash, __entry->blkno)
2048);
2049
2050DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_dx_dir_search_leaf_info);
2051
2052DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_delete_entry_dx);
2053
2054DEFINE_OCFS2_ULL_EVENT(ocfs2_readdir);
2055
2056TRACE_EVENT(ocfs2_find_files_on_disk,
2057 TP_PROTO(int namelen, const char *name, void *blkno,
2058 unsigned long long dir),
2059 TP_ARGS(namelen, name, blkno, dir),
2060 TP_STRUCT__entry(
2061 __field(int, namelen)
2062 __string(name, name)
2063 __field(void *, blkno)
2064 __field(unsigned long long, dir)
2065 ),
2066 TP_fast_assign(
2067 __entry->namelen = namelen;
2068 __assign_str(name, name);
2069 __entry->blkno = blkno;
2070 __entry->dir = dir;
2071 ),
2072 TP_printk("%.*s %p %llu", __entry->namelen, __get_str(name),
2073 __entry->blkno, __entry->dir)
2074);
2075
2076TRACE_EVENT(ocfs2_check_dir_for_entry,
2077 TP_PROTO(unsigned long long dir, int namelen, const char *name),
2078 TP_ARGS(dir, namelen, name),
2079 TP_STRUCT__entry(
2080 __field(unsigned long long, dir)
2081 __field(int, namelen)
2082 __string(name, name)
2083 ),
2084 TP_fast_assign(
2085 __entry->dir = dir;
2086 __entry->namelen = namelen;
2087 __assign_str(name, name);
2088 ),
2089 TP_printk("%llu %.*s", __entry->dir,
2090 __entry->namelen, __get_str(name))
2091);
2092
2093DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_dx_dir_attach_index);
2094
2095DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_format_cluster);
2096
2097TRACE_EVENT(ocfs2_dx_dir_index_root_block,
2098 TP_PROTO(unsigned long long dir,
2099 unsigned int major_hash, unsigned int minor_hash,
2100 int namelen, const char *name, unsigned int num_used),
2101 TP_ARGS(dir, major_hash, minor_hash, namelen, name, num_used),
2102 TP_STRUCT__entry(
2103 __field(unsigned long long, dir)
2104 __field(unsigned int, major_hash)
2105 __field(unsigned int, minor_hash)
2106 __field(int, namelen)
2107 __string(name, name)
2108 __field(unsigned int, num_used)
2109 ),
2110 TP_fast_assign(
2111 __entry->dir = dir;
2112 __entry->major_hash = major_hash;
2113 __entry->minor_hash = minor_hash;
2114 __entry->namelen = namelen;
2115 __assign_str(name, name);
2116 __entry->num_used = num_used;
2117 ),
2118 TP_printk("%llu %x %x %.*s %u", __entry->dir,
2119 __entry->major_hash, __entry->minor_hash,
2120 __entry->namelen, __get_str(name), __entry->num_used)
2121);
2122
2123DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_extend_dir);
2124
2125DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_rebalance);
2126
2127DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_dx_dir_rebalance_split);
2128
2129DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_prepare_dir_for_insert);
2130
2131/* End of trace events for fs/ocfs2/dir.c. */
2132
2133/* Trace events for fs/ocfs2/namei.c. */
2134
2135DECLARE_EVENT_CLASS(ocfs2__dentry_ops,
2136 TP_PROTO(void *dir, void *dentry, int name_len, const char *name,
2137 unsigned long long dir_blkno, unsigned long long extra),
2138 TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra),
2139 TP_STRUCT__entry(
2140 __field(void *, dir)
2141 __field(void *, dentry)
2142 __field(int, name_len)
2143 __string(name, name)
2144 __field(unsigned long long, dir_blkno)
2145 __field(unsigned long long, extra)
2146 ),
2147 TP_fast_assign(
2148 __entry->dir = dir;
2149 __entry->dentry = dentry;
2150 __entry->name_len = name_len;
2151 __assign_str(name, name);
2152 __entry->dir_blkno = dir_blkno;
2153 __entry->extra = extra;
2154 ),
2155 TP_printk("%p %p %.*s %llu %llu", __entry->dir, __entry->dentry,
2156 __entry->name_len, __get_str(name),
2157 __entry->dir_blkno, __entry->extra)
2158);
2159
2160#define DEFINE_OCFS2_DENTRY_OPS(name) \
2161DEFINE_EVENT(ocfs2__dentry_ops, name, \
2162TP_PROTO(void *dir, void *dentry, int name_len, const char *name, \
2163 unsigned long long dir_blkno, unsigned long long extra), \
2164 TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra))
2165
2166DEFINE_OCFS2_DENTRY_OPS(ocfs2_lookup);
2167
2168DEFINE_OCFS2_DENTRY_OPS(ocfs2_mkdir);
2169
2170DEFINE_OCFS2_DENTRY_OPS(ocfs2_create);
2171
2172DEFINE_OCFS2_DENTRY_OPS(ocfs2_unlink);
2173
2174DEFINE_OCFS2_DENTRY_OPS(ocfs2_symlink_create);
2175
2176DEFINE_OCFS2_DENTRY_OPS(ocfs2_mv_orphaned_inode_to_new);
2177
2178DEFINE_OCFS2_POINTER_EVENT(ocfs2_lookup_ret);
2179
2180TRACE_EVENT(ocfs2_mknod,
2181 TP_PROTO(void *dir, void *dentry, int name_len, const char *name,
2182 unsigned long long dir_blkno, unsigned long dev, int mode),
2183 TP_ARGS(dir, dentry, name_len, name, dir_blkno, dev, mode),
2184 TP_STRUCT__entry(
2185 __field(void *, dir)
2186 __field(void *, dentry)
2187 __field(int, name_len)
2188 __string(name, name)
2189 __field(unsigned long long, dir_blkno)
2190 __field(unsigned long, dev)
2191 __field(int, mode)
2192 ),
2193 TP_fast_assign(
2194 __entry->dir = dir;
2195 __entry->dentry = dentry;
2196 __entry->name_len = name_len;
2197 __assign_str(name, name);
2198 __entry->dir_blkno = dir_blkno;
2199 __entry->dev = dev;
2200 __entry->mode = mode;
2201 ),
2202 TP_printk("%p %p %.*s %llu %lu %d", __entry->dir, __entry->dentry,
2203 __entry->name_len, __get_str(name),
2204 __entry->dir_blkno, __entry->dev, __entry->mode)
2205);
2206
2207TRACE_EVENT(ocfs2_link,
2208 TP_PROTO(unsigned long long ino, int old_len, const char *old_name,
2209 int name_len, const char *name),
2210 TP_ARGS(ino, old_len, old_name, name_len, name),
2211 TP_STRUCT__entry(
2212 __field(unsigned long long, ino)
2213 __field(int, old_len)
2214 __string(old_name, old_name)
2215 __field(int, name_len)
2216 __string(name, name)
2217 ),
2218 TP_fast_assign(
2219 __entry->ino = ino;
2220 __entry->old_len = old_len;
2221 __assign_str(old_name, old_name);
2222 __entry->name_len = name_len;
2223 __assign_str(name, name);
2224 ),
2225 TP_printk("%llu %.*s %.*s", __entry->ino,
2226 __entry->old_len, __get_str(old_name),
2227 __entry->name_len, __get_str(name))
2228);
2229
2230DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_unlink_noent);
2231
2232DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock);
2233
2234DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock_end);
2235
2236TRACE_EVENT(ocfs2_rename,
2237 TP_PROTO(void *old_dir, void *old_dentry,
2238 void *new_dir, void *new_dentry,
2239 int old_len, const char *old_name,
2240 int new_len, const char *new_name),
2241 TP_ARGS(old_dir, old_dentry, new_dir, new_dentry,
2242 old_len, old_name, new_len, new_name),
2243 TP_STRUCT__entry(
2244 __field(void *, old_dir)
2245 __field(void *, old_dentry)
2246 __field(void *, new_dir)
2247 __field(void *, new_dentry)
2248 __field(int, old_len)
2249 __string(old_name, old_name)
2250 __field(int, new_len)
2251 __string(new_name, new_name)
2252 ),
2253 TP_fast_assign(
2254 __entry->old_dir = old_dir;
2255 __entry->old_dentry = old_dentry;
2256 __entry->new_dir = new_dir;
2257 __entry->new_dentry = new_dentry;
2258 __entry->old_len = old_len;
2259 __assign_str(old_name, old_name);
2260 __entry->new_len = new_len;
2261 __assign_str(new_name, new_name);
2262 ),
2263 TP_printk("%p %p %p %p %.*s %.*s",
2264 __entry->old_dir, __entry->old_dentry,
2265 __entry->new_dir, __entry->new_dentry,
2266 __entry->old_len, __get_str(old_name),
2267 __entry->new_len, __get_str(new_name))
2268);
2269
2270TRACE_EVENT(ocfs2_rename_target_exists,
2271 TP_PROTO(int new_len, const char *new_name),
2272 TP_ARGS(new_len, new_name),
2273 TP_STRUCT__entry(
2274 __field(int, new_len)
2275 __string(new_name, new_name)
2276 ),
2277 TP_fast_assign(
2278 __entry->new_len = new_len;
2279 __assign_str(new_name, new_name);
2280 ),
2281 TP_printk("%.*s", __entry->new_len, __get_str(new_name))
2282);
2283
2284DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_rename_disagree);
2285
2286TRACE_EVENT(ocfs2_rename_over_existing,
2287 TP_PROTO(unsigned long long new_blkno, void *new_bh,
2288 unsigned long long newdi_blkno),
2289 TP_ARGS(new_blkno, new_bh, newdi_blkno),
2290 TP_STRUCT__entry(
2291 __field(unsigned long long, new_blkno)
2292 __field(void *, new_bh)
2293 __field(unsigned long long, newdi_blkno)
2294 ),
2295 TP_fast_assign(
2296 __entry->new_blkno = new_blkno;
2297 __entry->new_bh = new_bh;
2298 __entry->newdi_blkno = newdi_blkno;
2299 ),
2300 TP_printk("%llu %p %llu", __entry->new_blkno, __entry->new_bh,
2301 __entry->newdi_blkno)
2302);
2303
2304DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_create_symlink_data);
2305
2306TRACE_EVENT(ocfs2_symlink_begin,
2307 TP_PROTO(void *dir, void *dentry, const char *symname,
2308 int len, const char *name),
2309 TP_ARGS(dir, dentry, symname, len, name),
2310 TP_STRUCT__entry(
2311 __field(void *, dir)
2312 __field(void *, dentry)
2313 __field(const char *, symname)
2314 __field(int, len)
2315 __string(name, name)
2316 ),
2317 TP_fast_assign(
2318 __entry->dir = dir;
2319 __entry->dentry = dentry;
2320 __entry->symname = symname;
2321 __entry->len = len;
2322 __assign_str(name, name);
2323 ),
2324 TP_printk("%p %p %s %.*s", __entry->dir, __entry->dentry,
2325 __entry->symname, __entry->len, __get_str(name))
2326);
2327
2328TRACE_EVENT(ocfs2_blkno_stringify,
2329 TP_PROTO(unsigned long long blkno, const char *name, int namelen),
2330 TP_ARGS(blkno, name, namelen),
2331 TP_STRUCT__entry(
2332 __field(unsigned long long, blkno)
2333 __string(name, name)
2334 __field(int, namelen)
2335 ),
2336 TP_fast_assign(
2337 __entry->blkno = blkno;
2338 __assign_str(name, name);
2339 __entry->namelen = namelen;
2340 ),
2341 TP_printk("%llu %s %d", __entry->blkno, __get_str(name),
2342 __entry->namelen)
2343);
2344
2345DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_add_begin);
2346
2347DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_orphan_add_end);
2348
2349TRACE_EVENT(ocfs2_orphan_del,
2350 TP_PROTO(unsigned long long dir, const char *name, int namelen),
2351 TP_ARGS(dir, name, namelen),
2352 TP_STRUCT__entry(
2353 __field(unsigned long long, dir)
2354 __string(name, name)
2355 __field(int, namelen)
2356 ),
2357 TP_fast_assign(
2358 __entry->dir = dir;
2359 __assign_str(name, name);
2360 __entry->namelen = namelen;
2361 ),
2362 TP_printk("%llu %s %d", __entry->dir, __get_str(name),
2363 __entry->namelen)
2364);
2365
2366/* End of trace events for fs/ocfs2/namei.c. */
2367
2368/* Trace events for fs/ocfs2/dcache.c. */
2369
2370TRACE_EVENT(ocfs2_dentry_revalidate,
2371 TP_PROTO(void *dentry, int len, const char *name),
2372 TP_ARGS(dentry, len, name),
2373 TP_STRUCT__entry(
2374 __field(void *, dentry)
2375 __field(int, len)
2376 __string(name, name)
2377 ),
2378 TP_fast_assign(
2379 __entry->dentry = dentry;
2380 __entry->len = len;
2381 __assign_str(name, name);
2382 ),
2383 TP_printk("%p %.*s", __entry->dentry, __entry->len, __get_str(name))
2384);
2385
2386TRACE_EVENT(ocfs2_dentry_revalidate_negative,
2387 TP_PROTO(int len, const char *name, unsigned long pgen,
2388 unsigned long gen),
2389 TP_ARGS(len, name, pgen, gen),
2390 TP_STRUCT__entry(
2391 __field(int, len)
2392 __string(name, name)
2393 __field(unsigned long, pgen)
2394 __field(unsigned long, gen)
2395 ),
2396 TP_fast_assign(
2397 __entry->len = len;
2398 __assign_str(name, name);
2399 __entry->pgen = pgen;
2400 __entry->gen = gen;
2401 ),
2402 TP_printk("%.*s %lu %lu", __entry->len, __get_str(name),
2403 __entry->pgen, __entry->gen)
2404);
2405
2406DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_delete);
2407
2408DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_dentry_revalidate_orphaned);
2409
2410DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_nofsdata);
2411
2412DEFINE_OCFS2_INT_EVENT(ocfs2_dentry_revalidate_ret);
2413
2414TRACE_EVENT(ocfs2_find_local_alias,
2415 TP_PROTO(int len, const char *name),
2416 TP_ARGS(len, name),
2417 TP_STRUCT__entry(
2418 __field(int, len)
2419 __string(name, name)
2420 ),
2421 TP_fast_assign(
2422 __entry->len = len;
2423 __assign_str(name, name);
2424 ),
2425 TP_printk("%.*s", __entry->len, __get_str(name))
2426);
2427
2428TRACE_EVENT(ocfs2_dentry_attach_lock,
2429 TP_PROTO(int len, const char *name,
2430 unsigned long long parent, void *fsdata),
2431 TP_ARGS(len, name, parent, fsdata),
2432 TP_STRUCT__entry(
2433 __field(int, len)
2434 __string(name, name)
2435 __field(unsigned long long, parent)
2436 __field(void *, fsdata)
2437 ),
2438 TP_fast_assign(
2439 __entry->len = len;
2440 __assign_str(name, name);
2441 __entry->parent = parent;
2442 __entry->fsdata = fsdata;
2443 ),
2444 TP_printk("%.*s %llu %p", __entry->len, __get_str(name),
2445 __entry->parent, __entry->fsdata)
2446);
2447
2448TRACE_EVENT(ocfs2_dentry_attach_lock_found,
2449 TP_PROTO(const char *name, unsigned long long parent,
2450 unsigned long long ino),
2451 TP_ARGS(name, parent, ino),
2452 TP_STRUCT__entry(
2453 __string(name, name)
2454 __field(unsigned long long, parent)
2455 __field(unsigned long long, ino)
2456 ),
2457 TP_fast_assign(
2458 __assign_str(name, name);
2459 __entry->parent = parent;
2460 __entry->ino = ino;
2461 ),
2462 TP_printk("%s %llu %llu", __get_str(name), __entry->parent, __entry->ino)
2463);
2464/* End of trace events for fs/ocfs2/dcache.c. */
2465
2466/* Trace events for fs/ocfs2/export.c. */
2467
2468TRACE_EVENT(ocfs2_get_dentry_begin,
2469 TP_PROTO(void *sb, void *handle, unsigned long long blkno),
2470 TP_ARGS(sb, handle, blkno),
2471 TP_STRUCT__entry(
2472 __field(void *, sb)
2473 __field(void *, handle)
2474 __field(unsigned long long, blkno)
2475 ),
2476 TP_fast_assign(
2477 __entry->sb = sb;
2478 __entry->handle = handle;
2479 __entry->blkno = blkno;
2480 ),
2481 TP_printk("%p %p %llu", __entry->sb, __entry->handle, __entry->blkno)
2482);
2483
2484DEFINE_OCFS2_INT_INT_EVENT(ocfs2_get_dentry_test_bit);
2485
2486DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_get_dentry_stale);
2487
2488DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_get_dentry_generation);
2489
2490DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_dentry_end);
2491
2492TRACE_EVENT(ocfs2_get_parent,
2493 TP_PROTO(void *child, int len, const char *name,
2494 unsigned long long ino),
2495 TP_ARGS(child, len, name, ino),
2496 TP_STRUCT__entry(
2497 __field(void *, child)
2498 __field(int, len)
2499 __string(name, name)
2500 __field(unsigned long long, ino)
2501 ),
2502 TP_fast_assign(
2503 __entry->child = child;
2504 __entry->len = len;
2505 __assign_str(name, name);
2506 __entry->ino = ino;
2507 ),
2508 TP_printk("%p %.*s %llu", __entry->child, __entry->len,
2509 __get_str(name), __entry->ino)
2510);
2511
2512DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_parent_end);
2513
2514TRACE_EVENT(ocfs2_encode_fh_begin,
2515 TP_PROTO(void *dentry, int name_len, const char *name,
2516 void *fh, int len, int connectable),
2517 TP_ARGS(dentry, name_len, name, fh, len, connectable),
2518 TP_STRUCT__entry(
2519 __field(void *, dentry)
2520 __field(int, name_len)
2521 __string(name, name)
2522 __field(void *, fh)
2523 __field(int, len)
2524 __field(int, connectable)
2525 ),
2526 TP_fast_assign(
2527 __entry->dentry = dentry;
2528 __entry->name_len = name_len;
2529 __assign_str(name, name);
2530 __entry->fh = fh;
2531 __entry->len = len;
2532 __entry->connectable = connectable;
2533 ),
2534 TP_printk("%p %.*s %p %d %d", __entry->dentry, __entry->name_len,
2535 __get_str(name), __entry->fh, __entry->len,
2536 __entry->connectable)
2537);
2538
2539DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_self);
2540
2541DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_parent);
2542
2543DEFINE_OCFS2_INT_EVENT(ocfs2_encode_fh_type);
2544
2545/* End of trace events for fs/ocfs2/export.c. */
2546
2547/* Trace events for fs/ocfs2/journal.c. */
2548
2549DEFINE_OCFS2_UINT_EVENT(ocfs2_commit_cache_begin);
2550
2551DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end);
2552
2553DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
2554
2555DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
2556
2557DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access);
2558
2559DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty);
2560
2561DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_journal_init);
2562
2563DEFINE_OCFS2_UINT_EVENT(ocfs2_journal_init_maxlen);
2564
2565DEFINE_OCFS2_INT_EVENT(ocfs2_journal_shutdown);
2566
2567DEFINE_OCFS2_POINTER_EVENT(ocfs2_journal_shutdown_wait);
2568
2569DEFINE_OCFS2_ULL_EVENT(ocfs2_complete_recovery);
2570
2571DEFINE_OCFS2_INT_EVENT(ocfs2_complete_recovery_end);
2572
2573TRACE_EVENT(ocfs2_complete_recovery_slot,
2574 TP_PROTO(int slot, unsigned long long la_ino,
2575 unsigned long long tl_ino, void *qrec),
2576 TP_ARGS(slot, la_ino, tl_ino, qrec),
2577 TP_STRUCT__entry(
2578 __field(int, slot)
2579 __field(unsigned long long, la_ino)
2580 __field(unsigned long long, tl_ino)
2581 __field(void *, qrec)
2582 ),
2583 TP_fast_assign(
2584 __entry->slot = slot;
2585 __entry->la_ino = la_ino;
2586 __entry->tl_ino = tl_ino;
2587 __entry->qrec = qrec;
2588 ),
2589 TP_printk("%d %llu %llu %p", __entry->slot, __entry->la_ino,
2590 __entry->tl_ino, __entry->qrec)
2591);
2592
2593DEFINE_OCFS2_INT_INT_EVENT(ocfs2_recovery_thread_node);
2594
2595DEFINE_OCFS2_INT_EVENT(ocfs2_recovery_thread_end);
2596
2597TRACE_EVENT(ocfs2_recovery_thread,
2598 TP_PROTO(int node_num, int osb_node_num, int disable,
2599 void *recovery_thread, int map_set),
2600 TP_ARGS(node_num, osb_node_num, disable, recovery_thread, map_set),
2601 TP_STRUCT__entry(
2602 __field(int, node_num)
2603 __field(int, osb_node_num)
2604 __field(int,disable)
2605 __field(void *, recovery_thread)
2606 __field(int,map_set)
2607 ),
2608 TP_fast_assign(
2609 __entry->node_num = node_num;
2610 __entry->osb_node_num = osb_node_num;
2611 __entry->disable = disable;
2612 __entry->recovery_thread = recovery_thread;
2613 __entry->map_set = map_set;
2614 ),
2615 TP_printk("%d %d %d %p %d", __entry->node_num,
2616 __entry->osb_node_num, __entry->disable,
2617 __entry->recovery_thread, __entry->map_set)
2618);
2619
2620DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_replay_journal_recovered);
2621
2622DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_lock_err);
2623
2624DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_skip);
2625
2626DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_recover_node);
2627
2628DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_recover_node_skip);
2629
2630DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_mark_dead_nodes);
2631
2632DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_begin);
2633
2634DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_end);
2635
2636DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_filldir);
2637
2638DEFINE_OCFS2_INT_EVENT(ocfs2_recover_orphans);
2639
2640DEFINE_OCFS2_ULL_EVENT(ocfs2_recover_orphans_iput);
2641
2642DEFINE_OCFS2_INT_EVENT(ocfs2_wait_on_mount);
2643
2644/* End of trace events for fs/ocfs2/journal.c. */
2645
2646/* Trace events for fs/ocfs2/buffer_head_io.c. */
2647
2648DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_read_blocks_sync);
2649
2650DEFINE_OCFS2_ULL_EVENT(ocfs2_read_blocks_sync_jbd);
2651
2652DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_read_blocks_from_disk);
2653
2654DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_bh);
2655
2656DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_end);
2657
2658TRACE_EVENT(ocfs2_write_block,
2659 TP_PROTO(unsigned long long block, void *ci),
2660 TP_ARGS(block, ci),
2661 TP_STRUCT__entry(
2662 __field(unsigned long long, block)
2663 __field(void *, ci)
2664 ),
2665 TP_fast_assign(
2666 __entry->block = block;
2667 __entry->ci = ci;
2668 ),
2669 TP_printk("%llu %p", __entry->block, __entry->ci)
2670);
2671
2672TRACE_EVENT(ocfs2_read_blocks_begin,
2673 TP_PROTO(void *ci, unsigned long long block,
2674 unsigned int nr, int flags),
2675 TP_ARGS(ci, block, nr, flags),
2676 TP_STRUCT__entry(
2677 __field(void *, ci)
2678 __field(unsigned long long, block)
2679 __field(unsigned int, nr)
2680 __field(int, flags)
2681 ),
2682 TP_fast_assign(
2683 __entry->ci = ci;
2684 __entry->block = block;
2685 __entry->nr = nr;
2686 __entry->flags = flags;
2687 ),
2688 TP_printk("%p %llu %u %d", __entry->ci, __entry->block,
2689 __entry->nr, __entry->flags)
2690);
2691
2692/* End of trace events for fs/ocfs2/buffer_head_io.c. */
2693
2694/* Trace events for fs/ocfs2/uptodate.c. */
2695
2696DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_copied_metadata_tree);
2697
2698DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_metadata_cache_purge);
2699
2700DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_buffer_cached_begin);
2701
2702TRACE_EVENT(ocfs2_buffer_cached_end,
2703 TP_PROTO(int index, void *item),
2704 TP_ARGS(index, item),
2705 TP_STRUCT__entry(
2706 __field(int, index)
2707 __field(void *, item)
2708 ),
2709 TP_fast_assign(
2710 __entry->index = index;
2711 __entry->item = item;
2712 ),
2713 TP_printk("%d %p", __entry->index, __entry->item)
2714);
2715
2716DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_append_cache_array);
2717
2718DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_insert_cache_tree);
2719
2720DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_expand_cache);
2721
2722DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_set_buffer_uptodate);
2723
2724DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_set_buffer_uptodate_begin);
2725
2726DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_remove_metadata_array);
2727
2728DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_remove_metadata_tree);
2729
2730DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_remove_block_from_cache);
2731
2732/* End of trace events for fs/ocfs2/uptodate.c. */
2733#endif /* _TRACE_OCFS2_H */
2734
2735/* This part must be outside protection */
2736#undef TRACE_INCLUDE_PATH
2737#define TRACE_INCLUDE_PATH .
2738#define TRACE_INCLUDE_FILE ocfs2_trace
2739#include <trace/define_trace.h>
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index a73f64166481..92fcd575775a 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -11,7 +11,6 @@
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/workqueue.h> 12#include <linux/workqueue.h>
13 13
14#define MLOG_MASK_PREFIX ML_QUOTA
15#include <cluster/masklog.h> 14#include <cluster/masklog.h>
16 15
17#include "ocfs2_fs.h" 16#include "ocfs2_fs.h"
@@ -27,6 +26,7 @@
27#include "super.h" 26#include "super.h"
28#include "buffer_head_io.h" 27#include "buffer_head_io.h"
29#include "quota.h" 28#include "quota.h"
29#include "ocfs2_trace.h"
30 30
31/* 31/*
32 * Locking of quotas with OCFS2 is rather complex. Here are rules that 32 * Locking of quotas with OCFS2 is rather complex. Here are rules that
@@ -130,8 +130,7 @@ int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh)
130 struct ocfs2_disk_dqtrailer *dqt = 130 struct ocfs2_disk_dqtrailer *dqt =
131 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); 131 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data);
132 132
133 mlog(0, "Validating quota block %llu\n", 133 trace_ocfs2_validate_quota_block((unsigned long long)bh->b_blocknr);
134 (unsigned long long)bh->b_blocknr);
135 134
136 BUG_ON(!buffer_uptodate(bh)); 135 BUG_ON(!buffer_uptodate(bh));
137 136
@@ -341,8 +340,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
341 u64 pcount; 340 u64 pcount;
342 int status; 341 int status;
343 342
344 mlog_entry_void();
345
346 /* Read global header */ 343 /* Read global header */
347 gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], 344 gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
348 OCFS2_INVALID_SLOT); 345 OCFS2_INVALID_SLOT);
@@ -402,7 +399,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
402 msecs_to_jiffies(oinfo->dqi_syncms)); 399 msecs_to_jiffies(oinfo->dqi_syncms));
403 400
404out_err: 401out_err:
405 mlog_exit(status); 402 if (status)
403 mlog_errno(status);
406 return status; 404 return status;
407out_unlock: 405out_unlock:
408 ocfs2_unlock_global_qf(oinfo, 0); 406 ocfs2_unlock_global_qf(oinfo, 0);
@@ -508,9 +506,10 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
508 olditime = dquot->dq_dqb.dqb_itime; 506 olditime = dquot->dq_dqb.dqb_itime;
509 oldbtime = dquot->dq_dqb.dqb_btime; 507 oldbtime = dquot->dq_dqb.dqb_btime;
510 ocfs2_global_disk2memdqb(dquot, &dqblk); 508 ocfs2_global_disk2memdqb(dquot, &dqblk);
511 mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n", 509 trace_ocfs2_sync_dquot(dquot->dq_id, dquot->dq_dqb.dqb_curspace,
512 dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange, 510 (long long)spacechange,
513 dquot->dq_dqb.dqb_curinodes, (long long)inodechange); 511 dquot->dq_dqb.dqb_curinodes,
512 (long long)inodechange);
514 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) 513 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
515 dquot->dq_dqb.dqb_curspace += spacechange; 514 dquot->dq_dqb.dqb_curspace += spacechange;
516 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) 515 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
@@ -557,7 +556,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
557 spin_unlock(&dq_data_lock); 556 spin_unlock(&dq_data_lock);
558 err = ocfs2_qinfo_lock(info, freeing); 557 err = ocfs2_qinfo_lock(info, freeing);
559 if (err < 0) { 558 if (err < 0) {
560 mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" 559 mlog(ML_ERROR, "Failed to lock quota info, losing quota write"
561 " (type=%d, id=%u)\n", dquot->dq_type, 560 " (type=%d, id=%u)\n", dquot->dq_type,
562 (unsigned)dquot->dq_id); 561 (unsigned)dquot->dq_id);
563 goto out; 562 goto out;
@@ -594,8 +593,8 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
594 struct ocfs2_super *osb = OCFS2_SB(sb); 593 struct ocfs2_super *osb = OCFS2_SB(sb);
595 int status = 0; 594 int status = 0;
596 595
597 mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id, 596 trace_ocfs2_sync_dquot_helper(dquot->dq_id, dquot->dq_type,
598 dquot->dq_type, type, sb->s_id); 597 type, sb->s_id);
599 if (type != dquot->dq_type) 598 if (type != dquot->dq_type)
600 goto out; 599 goto out;
601 status = ocfs2_lock_global_qf(oinfo, 1); 600 status = ocfs2_lock_global_qf(oinfo, 1);
@@ -621,7 +620,6 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
621out_ilock: 620out_ilock:
622 ocfs2_unlock_global_qf(oinfo, 1); 621 ocfs2_unlock_global_qf(oinfo, 1);
623out: 622out:
624 mlog_exit(status);
625 return status; 623 return status;
626} 624}
627 625
@@ -647,7 +645,7 @@ static int ocfs2_write_dquot(struct dquot *dquot)
647 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); 645 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
648 int status = 0; 646 int status = 0;
649 647
650 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 648 trace_ocfs2_write_dquot(dquot->dq_id, dquot->dq_type);
651 649
652 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); 650 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
653 if (IS_ERR(handle)) { 651 if (IS_ERR(handle)) {
@@ -660,7 +658,6 @@ static int ocfs2_write_dquot(struct dquot *dquot)
660 mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex); 658 mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
661 ocfs2_commit_trans(osb, handle); 659 ocfs2_commit_trans(osb, handle);
662out: 660out:
663 mlog_exit(status);
664 return status; 661 return status;
665} 662}
666 663
@@ -686,7 +683,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
686 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); 683 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
687 int status = 0; 684 int status = 0;
688 685
689 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 686 trace_ocfs2_release_dquot(dquot->dq_id, dquot->dq_type);
690 687
691 mutex_lock(&dquot->dq_lock); 688 mutex_lock(&dquot->dq_lock);
692 /* Check whether we are not racing with some other dqget() */ 689 /* Check whether we are not racing with some other dqget() */
@@ -722,7 +719,8 @@ out_ilock:
722 ocfs2_unlock_global_qf(oinfo, 1); 719 ocfs2_unlock_global_qf(oinfo, 1);
723out: 720out:
724 mutex_unlock(&dquot->dq_lock); 721 mutex_unlock(&dquot->dq_lock);
725 mlog_exit(status); 722 if (status)
723 mlog_errno(status);
726 return status; 724 return status;
727} 725}
728 726
@@ -743,7 +741,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
743 int need_alloc = ocfs2_global_qinit_alloc(sb, type); 741 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
744 handle_t *handle; 742 handle_t *handle;
745 743
746 mlog_entry("id=%u, type=%d", dquot->dq_id, type); 744 trace_ocfs2_acquire_dquot(dquot->dq_id, type);
747 mutex_lock(&dquot->dq_lock); 745 mutex_lock(&dquot->dq_lock);
748 /* 746 /*
749 * We need an exclusive lock, because we're going to update use count 747 * We need an exclusive lock, because we're going to update use count
@@ -809,7 +807,8 @@ out_dq:
809 set_bit(DQ_ACTIVE_B, &dquot->dq_flags); 807 set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
810out: 808out:
811 mutex_unlock(&dquot->dq_lock); 809 mutex_unlock(&dquot->dq_lock);
812 mlog_exit(status); 810 if (status)
811 mlog_errno(status);
813 return status; 812 return status;
814} 813}
815 814
@@ -829,7 +828,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
829 handle_t *handle; 828 handle_t *handle;
830 struct ocfs2_super *osb = OCFS2_SB(sb); 829 struct ocfs2_super *osb = OCFS2_SB(sb);
831 830
832 mlog_entry("id=%u, type=%d", dquot->dq_id, type); 831 trace_ocfs2_mark_dquot_dirty(dquot->dq_id, type);
833 832
834 /* In case user set some limits, sync dquot immediately to global 833 /* In case user set some limits, sync dquot immediately to global
835 * quota file so that information propagates quicker */ 834 * quota file so that information propagates quicker */
@@ -866,7 +865,8 @@ out_dlock:
866out_ilock: 865out_ilock:
867 ocfs2_unlock_global_qf(oinfo, 1); 866 ocfs2_unlock_global_qf(oinfo, 1);
868out: 867out:
869 mlog_exit(status); 868 if (status)
869 mlog_errno(status);
870 return status; 870 return status;
871} 871}
872 872
@@ -877,8 +877,6 @@ static int ocfs2_write_info(struct super_block *sb, int type)
877 int status = 0; 877 int status = 0;
878 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; 878 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
879 879
880 mlog_entry_void();
881
882 status = ocfs2_lock_global_qf(oinfo, 1); 880 status = ocfs2_lock_global_qf(oinfo, 1);
883 if (status < 0) 881 if (status < 0)
884 goto out; 882 goto out;
@@ -893,7 +891,8 @@ static int ocfs2_write_info(struct super_block *sb, int type)
893out_ilock: 891out_ilock:
894 ocfs2_unlock_global_qf(oinfo, 1); 892 ocfs2_unlock_global_qf(oinfo, 1);
895out: 893out:
896 mlog_exit(status); 894 if (status)
895 mlog_errno(status);
897 return status; 896 return status;
898} 897}
899 898
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index dc78764ccc4c..dc8007fc9247 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -8,7 +8,6 @@
8#include <linux/quotaops.h> 8#include <linux/quotaops.h>
9#include <linux/module.h> 9#include <linux/module.h>
10 10
11#define MLOG_MASK_PREFIX ML_QUOTA
12#include <cluster/masklog.h> 11#include <cluster/masklog.h>
13 12
14#include "ocfs2_fs.h" 13#include "ocfs2_fs.h"
@@ -23,6 +22,7 @@
23#include "quota.h" 22#include "quota.h"
24#include "uptodate.h" 23#include "uptodate.h"
25#include "super.h" 24#include "super.h"
25#include "ocfs2_trace.h"
26 26
27/* Number of local quota structures per block */ 27/* Number of local quota structures per block */
28static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 28static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -475,7 +475,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
475 struct ocfs2_recovery_chunk *rchunk, *next; 475 struct ocfs2_recovery_chunk *rchunk, *next;
476 qsize_t spacechange, inodechange; 476 qsize_t spacechange, inodechange;
477 477
478 mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); 478 trace_ocfs2_recover_local_quota_file((unsigned long)lqinode->i_ino, type);
479 479
480 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { 480 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
481 chunk = rchunk->rc_chunk; 481 chunk = rchunk->rc_chunk;
@@ -575,7 +575,8 @@ out_put_bh:
575 } 575 }
576 if (status < 0) 576 if (status < 0)
577 free_recovery_list(&(rec->r_list[type])); 577 free_recovery_list(&(rec->r_list[type]));
578 mlog_exit(status); 578 if (status)
579 mlog_errno(status);
579 return status; 580 return status;
580} 581}
581 582
@@ -600,7 +601,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
600 for (type = 0; type < MAXQUOTAS; type++) { 601 for (type = 0; type < MAXQUOTAS; type++) {
601 if (list_empty(&(rec->r_list[type]))) 602 if (list_empty(&(rec->r_list[type])))
602 continue; 603 continue;
603 mlog(0, "Recovering quota in slot %d\n", slot_num); 604 trace_ocfs2_finish_quota_recovery(slot_num);
604 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num); 605 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
605 if (!lqinode) { 606 if (!lqinode) {
606 status = -ENOENT; 607 status = -ENOENT;
@@ -882,9 +883,10 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
882 dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - 883 dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
883 od->dq_originodes); 884 od->dq_originodes);
884 spin_unlock(&dq_data_lock); 885 spin_unlock(&dq_data_lock);
885 mlog(0, "Writing local dquot %u space %lld inodes %lld\n", 886 trace_olq_set_dquot(
886 od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), 887 (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod),
887 (long long)le64_to_cpu(dqblk->dqb_inodemod)); 888 (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod),
889 od->dq_dquot.dq_id);
888} 890}
889 891
890/* Write dquot to local quota file */ 892/* Write dquot to local quota file */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index c4feceda2d96..3c7606cff1ab 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -16,7 +16,6 @@
16 */ 16 */
17 17
18#include <linux/sort.h> 18#include <linux/sort.h>
19#define MLOG_MASK_PREFIX ML_REFCOUNT
20#include <cluster/masklog.h> 19#include <cluster/masklog.h>
21#include "ocfs2.h" 20#include "ocfs2.h"
22#include "inode.h" 21#include "inode.h"
@@ -34,6 +33,7 @@
34#include "aops.h" 33#include "aops.h"
35#include "xattr.h" 34#include "xattr.h"
36#include "namei.h" 35#include "namei.h"
36#include "ocfs2_trace.h"
37 37
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/blkdev.h> 39#include <linux/blkdev.h>
@@ -84,8 +84,7 @@ static int ocfs2_validate_refcount_block(struct super_block *sb,
84 struct ocfs2_refcount_block *rb = 84 struct ocfs2_refcount_block *rb =
85 (struct ocfs2_refcount_block *)bh->b_data; 85 (struct ocfs2_refcount_block *)bh->b_data;
86 86
87 mlog(0, "Validating refcount block %llu\n", 87 trace_ocfs2_validate_refcount_block((unsigned long long)bh->b_blocknr);
88 (unsigned long long)bh->b_blocknr);
89 88
90 BUG_ON(!buffer_uptodate(bh)); 89 BUG_ON(!buffer_uptodate(bh));
91 90
@@ -545,8 +544,8 @@ void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
545 while ((node = rb_last(root)) != NULL) { 544 while ((node = rb_last(root)) != NULL) {
546 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); 545 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
547 546
548 mlog(0, "Purge tree %llu\n", 547 trace_ocfs2_purge_refcount_trees(
549 (unsigned long long) tree->rf_blkno); 548 (unsigned long long) tree->rf_blkno);
550 549
551 rb_erase(&tree->rf_node, root); 550 rb_erase(&tree->rf_node, root);
552 ocfs2_free_refcount_tree(tree); 551 ocfs2_free_refcount_tree(tree);
@@ -575,7 +574,8 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
575 574
576 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
577 576
578 mlog(0, "create tree for inode %lu\n", inode->i_ino); 577 trace_ocfs2_create_refcount_tree(
578 (unsigned long long)OCFS2_I(inode)->ip_blkno);
579 579
580 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 580 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
581 if (ret) { 581 if (ret) {
@@ -646,8 +646,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
646 di->i_refcount_loc = cpu_to_le64(first_blkno); 646 di->i_refcount_loc = cpu_to_le64(first_blkno);
647 spin_unlock(&oi->ip_lock); 647 spin_unlock(&oi->ip_lock);
648 648
649 mlog(0, "created tree for inode %lu, refblock %llu\n", 649 trace_ocfs2_create_refcount_tree_blkno((unsigned long long)first_blkno);
650 inode->i_ino, (unsigned long long)first_blkno);
651 650
652 ocfs2_journal_dirty(handle, di_bh); 651 ocfs2_journal_dirty(handle, di_bh);
653 652
@@ -1256,8 +1255,9 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
1256 goto out; 1255 goto out;
1257 } 1256 }
1258 1257
1259 mlog(0, "change index %d, old count %u, change %d\n", index, 1258 trace_ocfs2_change_refcount_rec(
1260 le32_to_cpu(rec->r_refcount), change); 1259 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1260 index, le32_to_cpu(rec->r_refcount), change);
1261 le32_add_cpu(&rec->r_refcount, change); 1261 le32_add_cpu(&rec->r_refcount, change);
1262 1262
1263 if (!rec->r_refcount) { 1263 if (!rec->r_refcount) {
@@ -1353,8 +1353,8 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1353 1353
1354 ocfs2_journal_dirty(handle, ref_root_bh); 1354 ocfs2_journal_dirty(handle, ref_root_bh);
1355 1355
1356 mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno, 1356 trace_ocfs2_expand_inline_ref_root((unsigned long long)blkno,
1357 le16_to_cpu(new_rb->rf_records.rl_used)); 1357 le16_to_cpu(new_rb->rf_records.rl_used));
1358 1358
1359 *ref_leaf_bh = new_bh; 1359 *ref_leaf_bh = new_bh;
1360 new_bh = NULL; 1360 new_bh = NULL;
@@ -1466,9 +1466,9 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1466 (struct ocfs2_refcount_block *)new_bh->b_data; 1466 (struct ocfs2_refcount_block *)new_bh->b_data;
1467 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records; 1467 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
1468 1468
1469 mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n", 1469 trace_ocfs2_divide_leaf_refcount_block(
1470 (unsigned long long)ref_leaf_bh->b_blocknr, 1470 (unsigned long long)ref_leaf_bh->b_blocknr,
1471 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); 1471 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
1472 1472
1473 /* 1473 /*
1474 * XXX: Improvement later. 1474 * XXX: Improvement later.
@@ -1601,8 +1601,8 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1601 1601
1602 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh); 1602 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
1603 1603
1604 mlog(0, "insert new leaf block %llu at %u\n", 1604 trace_ocfs2_new_leaf_refcount_block(
1605 (unsigned long long)new_bh->b_blocknr, new_cpos); 1605 (unsigned long long)new_bh->b_blocknr, new_cpos);
1606 1606
1607 /* Insert the new leaf block with the specific offset cpos. */ 1607 /* Insert the new leaf block with the specific offset cpos. */
1608 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr, 1608 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
@@ -1794,11 +1794,10 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
1794 (le16_to_cpu(rf_list->rl_used) - index) * 1794 (le16_to_cpu(rf_list->rl_used) - index) *
1795 sizeof(struct ocfs2_refcount_rec)); 1795 sizeof(struct ocfs2_refcount_rec));
1796 1796
1797 mlog(0, "insert refcount record start %llu, len %u, count %u " 1797 trace_ocfs2_insert_refcount_rec(
1798 "to leaf block %llu at index %d\n", 1798 (unsigned long long)ref_leaf_bh->b_blocknr, index,
1799 (unsigned long long)le64_to_cpu(rec->r_cpos), 1799 (unsigned long long)le64_to_cpu(rec->r_cpos),
1800 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount), 1800 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount));
1801 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1802 1801
1803 rf_list->rl_recs[index] = *rec; 1802 rf_list->rl_recs[index] = *rec;
1804 1803
@@ -1850,10 +1849,12 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1850 1849
1851 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1850 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
1852 1851
1853 mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n", 1852 trace_ocfs2_split_refcount_rec(le64_to_cpu(orig_rec->r_cpos),
1854 le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters), 1853 le32_to_cpu(orig_rec->r_clusters),
1855 le64_to_cpu(split_rec->r_cpos), 1854 le32_to_cpu(orig_rec->r_refcount),
1856 le32_to_cpu(split_rec->r_clusters)); 1855 le64_to_cpu(split_rec->r_cpos),
1856 le32_to_cpu(split_rec->r_clusters),
1857 le32_to_cpu(split_rec->r_refcount));
1857 1858
1858 /* 1859 /*
1859 * If we just need to split the header or tail clusters, 1860 * If we just need to split the header or tail clusters,
@@ -1967,12 +1968,11 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1967 1968
1968 if (split_rec->r_refcount) { 1969 if (split_rec->r_refcount) {
1969 rf_list->rl_recs[index] = *split_rec; 1970 rf_list->rl_recs[index] = *split_rec;
1970 mlog(0, "insert refcount record start %llu, len %u, count %u " 1971 trace_ocfs2_split_refcount_rec_insert(
1971 "to leaf block %llu at index %d\n", 1972 (unsigned long long)ref_leaf_bh->b_blocknr, index,
1972 (unsigned long long)le64_to_cpu(split_rec->r_cpos), 1973 (unsigned long long)le64_to_cpu(split_rec->r_cpos),
1973 le32_to_cpu(split_rec->r_clusters), 1974 le32_to_cpu(split_rec->r_clusters),
1974 le32_to_cpu(split_rec->r_refcount), 1975 le32_to_cpu(split_rec->r_refcount));
1975 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1976 1976
1977 if (merge) 1977 if (merge)
1978 ocfs2_refcount_rec_merge(rb, index); 1978 ocfs2_refcount_rec_merge(rb, index);
@@ -1997,7 +1997,7 @@ static int __ocfs2_increase_refcount(handle_t *handle,
1997 struct ocfs2_refcount_rec rec; 1997 struct ocfs2_refcount_rec rec;
1998 unsigned int set_len = 0; 1998 unsigned int set_len = 0;
1999 1999
2000 mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n", 2000 trace_ocfs2_increase_refcount_begin(
2001 (unsigned long long)ocfs2_metadata_cache_owner(ci), 2001 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2002 (unsigned long long)cpos, len); 2002 (unsigned long long)cpos, len);
2003 2003
@@ -2024,9 +2024,9 @@ static int __ocfs2_increase_refcount(handle_t *handle,
2024 */ 2024 */
2025 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos && 2025 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
2026 set_len <= len) { 2026 set_len <= len) {
2027 mlog(0, "increase refcount rec, start %llu, len %u, " 2027 trace_ocfs2_increase_refcount_change(
2028 "count %u\n", (unsigned long long)cpos, set_len, 2028 (unsigned long long)cpos, set_len,
2029 le32_to_cpu(rec.r_refcount)); 2029 le32_to_cpu(rec.r_refcount));
2030 ret = ocfs2_change_refcount_rec(handle, ci, 2030 ret = ocfs2_change_refcount_rec(handle, ci,
2031 ref_leaf_bh, index, 2031 ref_leaf_bh, index,
2032 merge, 1); 2032 merge, 1);
@@ -2037,7 +2037,7 @@ static int __ocfs2_increase_refcount(handle_t *handle,
2037 } else if (!rec.r_refcount) { 2037 } else if (!rec.r_refcount) {
2038 rec.r_refcount = cpu_to_le32(1); 2038 rec.r_refcount = cpu_to_le32(1);
2039 2039
2040 mlog(0, "insert refcount rec, start %llu, len %u\n", 2040 trace_ocfs2_increase_refcount_insert(
2041 (unsigned long long)le64_to_cpu(rec.r_cpos), 2041 (unsigned long long)le64_to_cpu(rec.r_cpos),
2042 set_len); 2042 set_len);
2043 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, 2043 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
@@ -2055,8 +2055,7 @@ static int __ocfs2_increase_refcount(handle_t *handle,
2055 rec.r_clusters = cpu_to_le32(set_len); 2055 rec.r_clusters = cpu_to_le32(set_len);
2056 le32_add_cpu(&rec.r_refcount, 1); 2056 le32_add_cpu(&rec.r_refcount, 1);
2057 2057
2058 mlog(0, "split refcount rec, start %llu, " 2058 trace_ocfs2_increase_refcount_split(
2059 "len %u, count %u\n",
2060 (unsigned long long)le64_to_cpu(rec.r_cpos), 2059 (unsigned long long)le64_to_cpu(rec.r_cpos),
2061 set_len, le32_to_cpu(rec.r_refcount)); 2060 set_len, le32_to_cpu(rec.r_refcount));
2062 ret = ocfs2_split_refcount_rec(handle, ci, 2061 ret = ocfs2_split_refcount_rec(handle, ci,
@@ -2095,6 +2094,11 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
2095 2094
2096 BUG_ON(rb->rf_records.rl_used); 2095 BUG_ON(rb->rf_records.rl_used);
2097 2096
2097 trace_ocfs2_remove_refcount_extent(
2098 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2099 (unsigned long long)ref_leaf_bh->b_blocknr,
2100 le32_to_cpu(rb->rf_cpos));
2101
2098 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 2102 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
2099 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos), 2103 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
2100 1, meta_ac, dealloc); 2104 1, meta_ac, dealloc);
@@ -2137,7 +2141,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
2137 if (!rb->rf_list.l_next_free_rec) { 2141 if (!rb->rf_list.l_next_free_rec) {
2138 BUG_ON(rb->rf_clusters); 2142 BUG_ON(rb->rf_clusters);
2139 2143
2140 mlog(0, "reset refcount tree root %llu to be a record block.\n", 2144 trace_ocfs2_restore_refcount_block(
2141 (unsigned long long)ref_root_bh->b_blocknr); 2145 (unsigned long long)ref_root_bh->b_blocknr);
2142 2146
2143 rb->rf_flags = 0; 2147 rb->rf_flags = 0;
@@ -2184,6 +2188,10 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
2184 BUG_ON(cpos + len > 2188 BUG_ON(cpos + len >
2185 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters)); 2189 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters));
2186 2190
2191 trace_ocfs2_decrease_refcount_rec(
2192 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2193 (unsigned long long)cpos, len);
2194
2187 if (cpos == le64_to_cpu(rec->r_cpos) && 2195 if (cpos == le64_to_cpu(rec->r_cpos) &&
2188 len == le32_to_cpu(rec->r_clusters)) 2196 len == le32_to_cpu(rec->r_clusters))
2189 ret = ocfs2_change_refcount_rec(handle, ci, 2197 ret = ocfs2_change_refcount_rec(handle, ci,
@@ -2195,12 +2203,6 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
2195 2203
2196 le32_add_cpu(&split.r_refcount, -1); 2204 le32_add_cpu(&split.r_refcount, -1);
2197 2205
2198 mlog(0, "split refcount rec, start %llu, "
2199 "len %u, count %u, original start %llu, len %u\n",
2200 (unsigned long long)le64_to_cpu(split.r_cpos),
2201 len, le32_to_cpu(split.r_refcount),
2202 (unsigned long long)le64_to_cpu(rec->r_cpos),
2203 le32_to_cpu(rec->r_clusters));
2204 ret = ocfs2_split_refcount_rec(handle, ci, 2206 ret = ocfs2_split_refcount_rec(handle, ci,
2205 ref_root_bh, ref_leaf_bh, 2207 ref_root_bh, ref_leaf_bh,
2206 &split, index, 1, 2208 &split, index, 1,
@@ -2239,10 +2241,9 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
2239 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2241 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2240 struct buffer_head *ref_leaf_bh = NULL; 2242 struct buffer_head *ref_leaf_bh = NULL;
2241 2243
2242 mlog(0, "Tree owner %llu, decrease refcount start %llu, " 2244 trace_ocfs2_decrease_refcount(
2243 "len %u, delete %u\n", 2245 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2244 (unsigned long long)ocfs2_metadata_cache_owner(ci), 2246 (unsigned long long)cpos, len, delete);
2245 (unsigned long long)cpos, len, delete);
2246 2247
2247 while (len) { 2248 while (len) {
2248 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2249 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
@@ -2352,8 +2353,8 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode,
2352{ 2353{
2353 int ret; 2354 int ret;
2354 2355
2355 mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n", 2356 trace_ocfs2_mark_extent_refcounted(OCFS2_I(inode)->ip_blkno,
2356 inode->i_ino, cpos, len, phys); 2357 cpos, len, phys);
2357 2358
2358 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { 2359 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
2359 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " 2360 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
@@ -2392,8 +2393,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2392 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL; 2393 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
2393 u32 len; 2394 u32 len;
2394 2395
2395 mlog(0, "start_cpos %llu, clusters %u\n",
2396 (unsigned long long)start_cpos, clusters);
2397 while (clusters) { 2396 while (clusters) {
2398 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2397 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
2399 cpos, clusters, &rec, 2398 cpos, clusters, &rec,
@@ -2427,12 +2426,11 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2427 2426
2428 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 2427 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2429 2428
2430 mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu," 2429 trace_ocfs2_calc_refcount_meta_credits_iterate(
2431 "rec->r_clusters %u, rec->r_refcount %u, index %d\n", 2430 recs_add, (unsigned long long)cpos, clusters,
2432 recs_add, (unsigned long long)cpos, clusters, 2431 (unsigned long long)le64_to_cpu(rec.r_cpos),
2433 (unsigned long long)le64_to_cpu(rec.r_cpos), 2432 le32_to_cpu(rec.r_clusters),
2434 le32_to_cpu(rec.r_clusters), 2433 le32_to_cpu(rec.r_refcount), index);
2435 le32_to_cpu(rec.r_refcount), index);
2436 2434
2437 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + 2435 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
2438 le32_to_cpu(rec.r_clusters)) - cpos; 2436 le32_to_cpu(rec.r_clusters)) - cpos;
@@ -2488,7 +2486,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2488 if (!ref_blocks) 2486 if (!ref_blocks)
2489 goto out; 2487 goto out;
2490 2488
2491 mlog(0, "we need ref_blocks %d\n", ref_blocks);
2492 *meta_add += ref_blocks; 2489 *meta_add += ref_blocks;
2493 *credits += ref_blocks; 2490 *credits += ref_blocks;
2494 2491
@@ -2514,6 +2511,10 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2514 } 2511 }
2515 2512
2516out: 2513out:
2514
2515 trace_ocfs2_calc_refcount_meta_credits(
2516 (unsigned long long)start_cpos, clusters,
2517 *meta_add, *credits);
2517 brelse(ref_leaf_bh); 2518 brelse(ref_leaf_bh);
2518 brelse(prev_bh); 2519 brelse(prev_bh);
2519 return ret; 2520 return ret;
@@ -2578,8 +2579,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2578 goto out; 2579 goto out;
2579 } 2580 }
2580 2581
2581 mlog(0, "reserve new metadata %d blocks, credits = %d\n", 2582 trace_ocfs2_prepare_refcount_change_for_del(*ref_blocks, *credits);
2582 *ref_blocks, *credits);
2583 2583
2584out: 2584out:
2585 brelse(ref_root_bh); 2585 brelse(ref_root_bh);
@@ -2886,8 +2886,7 @@ static int ocfs2_lock_refcount_allocators(struct super_block *sb,
2886 goto out; 2886 goto out;
2887 } 2887 }
2888 2888
2889 mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n", 2889 trace_ocfs2_lock_refcount_allocators(meta_add, *credits);
2890 meta_add, num_clusters, *credits);
2891 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add, 2890 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
2892 meta_ac); 2891 meta_ac);
2893 if (ret) { 2892 if (ret) {
@@ -2937,8 +2936,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2937 loff_t offset, end, map_end; 2936 loff_t offset, end, map_end;
2938 struct address_space *mapping = context->inode->i_mapping; 2937 struct address_space *mapping = context->inode->i_mapping;
2939 2938
2940 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, 2939 trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
2941 new_cluster, new_len, cpos); 2940 new_cluster, new_len);
2942 2941
2943 readahead_pages = 2942 readahead_pages =
2944 (ocfs2_cow_contig_clusters(sb) << 2943 (ocfs2_cow_contig_clusters(sb) <<
@@ -3031,8 +3030,8 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3031 struct buffer_head *old_bh = NULL; 3030 struct buffer_head *old_bh = NULL;
3032 struct buffer_head *new_bh = NULL; 3031 struct buffer_head *new_bh = NULL;
3033 3032
3034 mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster, 3033 trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
3035 new_cluster, new_len); 3034 new_cluster, new_len);
3036 3035
3037 for (i = 0; i < blocks; i++, old_block++, new_block++) { 3036 for (i = 0; i < blocks; i++, old_block++, new_block++) {
3038 new_bh = sb_getblk(osb->sb, new_block); 3037 new_bh = sb_getblk(osb->sb, new_block);
@@ -3085,8 +3084,8 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
3085 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); 3084 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
3086 u64 ino = ocfs2_metadata_cache_owner(et->et_ci); 3085 u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
3087 3086
3088 mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n", 3087 trace_ocfs2_clear_ext_refcount((unsigned long long)ino,
3089 (unsigned long long)ino, cpos, len, p_cluster, ext_flags); 3088 cpos, len, p_cluster, ext_flags);
3090 3089
3091 memset(&replace_rec, 0, sizeof(replace_rec)); 3090 memset(&replace_rec, 0, sizeof(replace_rec));
3092 replace_rec.e_cpos = cpu_to_le32(cpos); 3091 replace_rec.e_cpos = cpu_to_le32(cpos);
@@ -3141,8 +3140,8 @@ static int ocfs2_replace_clusters(handle_t *handle,
3141 struct ocfs2_caching_info *ci = context->data_et.et_ci; 3140 struct ocfs2_caching_info *ci = context->data_et.et_ci;
3142 u64 ino = ocfs2_metadata_cache_owner(ci); 3141 u64 ino = ocfs2_metadata_cache_owner(ci);
3143 3142
3144 mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n", 3143 trace_ocfs2_replace_clusters((unsigned long long)ino,
3145 (unsigned long long)ino, cpos, old, new, len, ext_flags); 3144 cpos, old, new, len, ext_flags);
3146 3145
3147 /*If the old clusters is unwritten, no need to duplicate. */ 3146 /*If the old clusters is unwritten, no need to duplicate. */
3148 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { 3147 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
@@ -3236,8 +3235,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3236 struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci; 3235 struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci;
3237 struct ocfs2_refcount_rec rec; 3236 struct ocfs2_refcount_rec rec;
3238 3237
3239 mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n", 3238 trace_ocfs2_make_clusters_writable(cpos, p_cluster,
3240 cpos, p_cluster, num_clusters, e_flags); 3239 num_clusters, e_flags);
3241 3240
3242 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters, 3241 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
3243 &context->data_et, 3242 &context->data_et,
@@ -3475,9 +3474,9 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3475 goto out; 3474 goto out;
3476 } 3475 }
3477 3476
3478 mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, " 3477 trace_ocfs2_refcount_cow_hunk(OCFS2_I(inode)->ip_blkno,
3479 "cow_len %u\n", inode->i_ino, 3478 cpos, write_len, max_cpos,
3480 cpos, write_len, cow_start, cow_len); 3479 cow_start, cow_len);
3481 3480
3482 BUG_ON(cow_len == 0); 3481 BUG_ON(cow_len == 0);
3483 3482
@@ -3756,8 +3755,7 @@ int ocfs2_add_refcount_flag(struct inode *inode,
3756 goto out; 3755 goto out;
3757 } 3756 }
3758 3757
3759 mlog(0, "reserve new metadata %d, credits = %d\n", 3758 trace_ocfs2_add_refcount_flag(ref_blocks, credits);
3760 ref_blocks, credits);
3761 3759
3762 if (ref_blocks) { 3760 if (ref_blocks) {
3763 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 3761 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 3e78db361bc7..41ffd36c689c 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -30,10 +30,10 @@
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31#include <linux/list.h> 31#include <linux/list.h>
32 32
33#define MLOG_MASK_PREFIX ML_RESERVATIONS
34#include <cluster/masklog.h> 33#include <cluster/masklog.h>
35 34
36#include "ocfs2.h" 35#include "ocfs2.h"
36#include "ocfs2_trace.h"
37 37
38#ifdef CONFIG_OCFS2_DEBUG_FS 38#ifdef CONFIG_OCFS2_DEBUG_FS
39#define OCFS2_CHECK_RESERVATIONS 39#define OCFS2_CHECK_RESERVATIONS
@@ -321,8 +321,7 @@ static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
321 321
322 assert_spin_locked(&resv_lock); 322 assert_spin_locked(&resv_lock);
323 323
324 mlog(0, "Insert reservation start: %u len: %u\n", new->r_start, 324 trace_ocfs2_resv_insert(new->r_start, new->r_len);
325 new->r_len);
326 325
327 while (*p) { 326 while (*p) {
328 parent = *p; 327 parent = *p;
@@ -423,8 +422,8 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
423 unsigned int best_start, best_len = 0; 422 unsigned int best_start, best_len = 0;
424 int offset, start, found; 423 int offset, start, found;
425 424
426 mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n", 425 trace_ocfs2_resmap_find_free_bits_begin(search_start, search_len,
427 wanted, search_start, search_len, resmap->m_bitmap_len); 426 wanted, resmap->m_bitmap_len);
428 427
429 found = best_start = best_len = 0; 428 found = best_start = best_len = 0;
430 429
@@ -463,7 +462,7 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
463 *rlen = best_len; 462 *rlen = best_len;
464 *rstart = best_start; 463 *rstart = best_start;
465 464
466 mlog(0, "Found start: %u len: %u\n", best_start, best_len); 465 trace_ocfs2_resmap_find_free_bits_end(best_start, best_len);
467 466
468 return *rlen; 467 return *rlen;
469} 468}
@@ -487,9 +486,8 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
487 * - our window should be last in all reservations 486 * - our window should be last in all reservations
488 * - need to make sure we don't go past end of bitmap 487 * - need to make sure we don't go past end of bitmap
489 */ 488 */
490 489 trace_ocfs2_resv_find_window_begin(resv->r_start, ocfs2_resv_end(resv),
491 mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n", 490 goal, wanted, RB_EMPTY_ROOT(root));
492 resv->r_start, ocfs2_resv_end(resv), goal, wanted);
493 491
494 assert_spin_locked(&resv_lock); 492 assert_spin_locked(&resv_lock);
495 493
@@ -498,9 +496,6 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
498 * Easiest case - empty tree. We can just take 496 * Easiest case - empty tree. We can just take
499 * whatever window of free bits we want. 497 * whatever window of free bits we want.
500 */ 498 */
501
502 mlog(0, "Empty root\n");
503
504 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal, 499 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
505 resmap->m_bitmap_len - goal, 500 resmap->m_bitmap_len - goal,
506 &cstart, &clen); 501 &cstart, &clen);
@@ -524,8 +519,6 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
524 prev_resv = ocfs2_find_resv_lhs(resmap, goal); 519 prev_resv = ocfs2_find_resv_lhs(resmap, goal);
525 520
526 if (prev_resv == NULL) { 521 if (prev_resv == NULL) {
527 mlog(0, "Goal on LHS of leftmost window\n");
528
529 /* 522 /*
530 * A NULL here means that the search code couldn't 523 * A NULL here means that the search code couldn't
531 * find a window that starts before goal. 524 * find a window that starts before goal.
@@ -570,13 +563,15 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
570 next_resv = NULL; 563 next_resv = NULL;
571 } 564 }
572 565
566 trace_ocfs2_resv_find_window_prev(prev_resv->r_start,
567 ocfs2_resv_end(prev_resv));
568
573 prev = &prev_resv->r_node; 569 prev = &prev_resv->r_node;
574 570
575 /* Now we do a linear search for a window, starting at 'prev_rsv' */ 571 /* Now we do a linear search for a window, starting at 'prev_rsv' */
576 while (1) { 572 while (1) {
577 next = rb_next(prev); 573 next = rb_next(prev);
578 if (next) { 574 if (next) {
579 mlog(0, "One more resv found in linear search\n");
580 next_resv = rb_entry(next, 575 next_resv = rb_entry(next,
581 struct ocfs2_alloc_reservation, 576 struct ocfs2_alloc_reservation,
582 r_node); 577 r_node);
@@ -585,7 +580,6 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
585 gap_end = next_resv->r_start - 1; 580 gap_end = next_resv->r_start - 1;
586 gap_len = gap_end - gap_start + 1; 581 gap_len = gap_end - gap_start + 1;
587 } else { 582 } else {
588 mlog(0, "No next node\n");
589 /* 583 /*
590 * We're at the rightmost edge of the 584 * We're at the rightmost edge of the
591 * tree. See if a reservation between this 585 * tree. See if a reservation between this
@@ -596,6 +590,8 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
596 gap_end = resmap->m_bitmap_len - 1; 590 gap_end = resmap->m_bitmap_len - 1;
597 } 591 }
598 592
593 trace_ocfs2_resv_find_window_next(next ? next_resv->r_start: -1,
594 next ? ocfs2_resv_end(next_resv) : -1);
599 /* 595 /*
600 * No need to check this gap if we have already found 596 * No need to check this gap if we have already found
601 * a larger region of free bits. 597 * a larger region of free bits.
@@ -654,8 +650,9 @@ static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
654 lru_resv = list_first_entry(&resmap->m_lru, 650 lru_resv = list_first_entry(&resmap->m_lru,
655 struct ocfs2_alloc_reservation, r_lru); 651 struct ocfs2_alloc_reservation, r_lru);
656 652
657 mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start, 653 trace_ocfs2_cannibalize_resv_begin(lru_resv->r_start,
658 lru_resv->r_len, ocfs2_resv_end(lru_resv)); 654 lru_resv->r_len,
655 ocfs2_resv_end(lru_resv));
659 656
660 /* 657 /*
661 * Cannibalize (some or all) of the target reservation and 658 * Cannibalize (some or all) of the target reservation and
@@ -684,10 +681,9 @@ static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
684 resv->r_len = shrink; 681 resv->r_len = shrink;
685 } 682 }
686 683
687 mlog(0, "Reservation now looks like: r_start: %u r_end: %u " 684 trace_ocfs2_cannibalize_resv_end(resv->r_start, ocfs2_resv_end(resv),
688 "r_len: %u r_last_start: %u r_last_len: %u\n", 685 resv->r_len, resv->r_last_start,
689 resv->r_start, ocfs2_resv_end(resv), resv->r_len, 686 resv->r_last_len);
690 resv->r_last_start, resv->r_last_len);
691 687
692 ocfs2_resv_insert(resmap, resv); 688 ocfs2_resv_insert(resmap, resv);
693} 689}
@@ -748,7 +744,6 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
748 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) 744 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
749 wanted = *clen; 745 wanted = *clen;
750 746
751 mlog(0, "empty reservation, find new window\n");
752 /* 747 /*
753 * Try to get a window here. If it works, we must fall 748 * Try to get a window here. If it works, we must fall
754 * through and test the bitmap . This avoids some 749 * through and test the bitmap . This avoids some
@@ -757,6 +752,7 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
757 * that inode. 752 * that inode.
758 */ 753 */
759 ocfs2_resv_find_window(resmap, resv, wanted); 754 ocfs2_resv_find_window(resmap, resv, wanted);
755 trace_ocfs2_resmap_resv_bits(resv->r_start, resv->r_len);
760 } 756 }
761 757
762 BUG_ON(ocfs2_resv_empty(resv)); 758 BUG_ON(ocfs2_resv_empty(resv));
@@ -813,10 +809,10 @@ void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
813 809
814 spin_lock(&resv_lock); 810 spin_lock(&resv_lock);
815 811
816 mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u " 812 trace_ocfs2_resmap_claimed_bits_begin(cstart, cend, clen, resv->r_start,
817 "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n", 813 ocfs2_resv_end(resv), resv->r_len,
818 cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv), 814 resv->r_last_start,
819 resv->r_len, resv->r_last_start, resv->r_last_len); 815 resv->r_last_len);
820 816
821 BUG_ON(cstart < resv->r_start); 817 BUG_ON(cstart < resv->r_start);
822 BUG_ON(cstart > ocfs2_resv_end(resv)); 818 BUG_ON(cstart > ocfs2_resv_end(resv));
@@ -833,10 +829,9 @@ void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
833 if (!ocfs2_resv_empty(resv)) 829 if (!ocfs2_resv_empty(resv))
834 ocfs2_resv_mark_lru(resmap, resv); 830 ocfs2_resv_mark_lru(resmap, resv);
835 831
836 mlog(0, "Reservation now looks like: r_start: %u r_end: %u " 832 trace_ocfs2_resmap_claimed_bits_end(resv->r_start, ocfs2_resv_end(resv),
837 "r_len: %u r_last_start: %u r_last_len: %u\n", 833 resv->r_len, resv->r_last_start,
838 resv->r_start, ocfs2_resv_end(resv), resv->r_len, 834 resv->r_last_len);
839 resv->r_last_start, resv->r_last_len);
840 835
841 ocfs2_check_resmap(resmap); 836 ocfs2_check_resmap(resmap);
842 837
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
index 1e49cc29d06c..42c2b804f3fd 100644
--- a/fs/ocfs2/reservations.h
+++ b/fs/ocfs2/reservations.h
@@ -29,7 +29,7 @@
29struct ocfs2_alloc_reservation { 29struct ocfs2_alloc_reservation {
30 struct rb_node r_node; 30 struct rb_node r_node;
31 31
32 unsigned int r_start; /* Begining of current window */ 32 unsigned int r_start; /* Beginning of current window */
33 unsigned int r_len; /* Length of the window */ 33 unsigned int r_len; /* Length of the window */
34 34
35 unsigned int r_last_len; /* Length of most recent alloc */ 35 unsigned int r_last_len; /* Length of most recent alloc */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index dacd553d8617..ec55add7604a 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -27,7 +27,6 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/types.h> 28#include <linux/types.h>
29 29
30#define MLOG_MASK_PREFIX ML_DISK_ALLOC
31#include <cluster/masklog.h> 30#include <cluster/masklog.h>
32 31
33#include "ocfs2.h" 32#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "super.h" 38#include "super.h"
40#include "sysfile.h" 39#include "sysfile.h"
41#include "uptodate.h" 40#include "uptodate.h"
41#include "ocfs2_trace.h"
42 42
43#include "buffer_head_io.h" 43#include "buffer_head_io.h"
44#include "suballoc.h" 44#include "suballoc.h"
@@ -82,7 +82,6 @@ static u16 ocfs2_calc_new_backup_super(struct inode *inode,
82 backups++; 82 backups++;
83 } 83 }
84 84
85 mlog_exit_void();
86 return backups; 85 return backups;
87} 86}
88 87
@@ -103,8 +102,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
103 u16 cl_bpc = le16_to_cpu(cl->cl_bpc); 102 u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
104 u16 cl_cpg = le16_to_cpu(cl->cl_cpg); 103 u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
105 104
106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", 105 trace_ocfs2_update_last_group_and_inode(new_clusters,
107 new_clusters, first_new_cluster); 106 first_new_cluster);
108 107
109 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode), 108 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
110 group_bh, OCFS2_JOURNAL_ACCESS_WRITE); 109 group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -176,7 +175,8 @@ out_rollback:
176 le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); 175 le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
177 } 176 }
178out: 177out:
179 mlog_exit(ret); 178 if (ret)
179 mlog_errno(ret);
180 return ret; 180 return ret;
181} 181}
182 182
@@ -281,8 +281,6 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
281 u32 first_new_cluster; 281 u32 first_new_cluster;
282 u64 lgd_blkno; 282 u64 lgd_blkno;
283 283
284 mlog_entry_void();
285
286 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 284 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
287 return -EROFS; 285 return -EROFS;
288 286
@@ -342,7 +340,8 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
342 goto out_unlock; 340 goto out_unlock;
343 } 341 }
344 342
345 mlog(0, "extend the last group at %llu, new clusters = %d\n", 343
344 trace_ocfs2_group_extend(
346 (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters); 345 (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters);
347 346
348 handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); 347 handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS);
@@ -377,7 +376,6 @@ out_mutex:
377 iput(main_bm_inode); 376 iput(main_bm_inode);
378 377
379out: 378out:
380 mlog_exit_void();
381 return ret; 379 return ret;
382} 380}
383 381
@@ -472,8 +470,6 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
472 struct ocfs2_chain_rec *cr; 470 struct ocfs2_chain_rec *cr;
473 u16 cl_bpc; 471 u16 cl_bpc;
474 472
475 mlog_entry_void();
476
477 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 473 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
478 return -EROFS; 474 return -EROFS;
479 475
@@ -520,8 +516,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
520 goto out_unlock; 516 goto out_unlock;
521 } 517 }
522 518
523 mlog(0, "Add a new group %llu in chain = %u, length = %u\n", 519 trace_ocfs2_group_add((unsigned long long)input->group,
524 (unsigned long long)input->group, input->chain, input->clusters); 520 input->chain, input->clusters, input->frees);
525 521
526 handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS); 522 handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS);
527 if (IS_ERR(handle)) { 523 if (IS_ERR(handle)) {
@@ -589,6 +585,5 @@ out_mutex:
589 iput(main_bm_inode); 585 iput(main_bm_inode);
590 586
591out: 587out:
592 mlog_exit_void();
593 return ret; 588 return ret;
594} 589}
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index ab4e0172cc1d..26fc0014d509 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -27,7 +27,6 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29 29
30#define MLOG_MASK_PREFIX ML_SUPER
31#include <cluster/masklog.h> 30#include <cluster/masklog.h>
32 31
33#include "ocfs2.h" 32#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "slot_map.h" 38#include "slot_map.h"
40#include "super.h" 39#include "super.h"
41#include "sysfile.h" 40#include "sysfile.h"
41#include "ocfs2_trace.h"
42 42
43#include "buffer_head_io.h" 43#include "buffer_head_io.h"
44 44
@@ -142,8 +142,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
142 BUG_ON(si->si_blocks == 0); 142 BUG_ON(si->si_blocks == 0);
143 BUG_ON(si->si_bh == NULL); 143 BUG_ON(si->si_bh == NULL);
144 144
145 mlog(0, "Refreshing slot map, reading %u block(s)\n", 145 trace_ocfs2_refresh_slot_info(si->si_blocks);
146 si->si_blocks);
147 146
148 /* 147 /*
149 * We pass -1 as blocknr because we expect all of si->si_bh to 148 * We pass -1 as blocknr because we expect all of si->si_bh to
@@ -381,8 +380,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
381 /* The size checks above should ensure this */ 380 /* The size checks above should ensure this */
382 BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); 381 BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
383 382
384 mlog(0, "Slot map needs %u buffers for %llu bytes\n", 383 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
385 si->si_blocks, bytes);
386 384
387 si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, 385 si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
388 GFP_KERNEL); 386 GFP_KERNEL);
@@ -400,8 +398,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
400 goto bail; 398 goto bail;
401 } 399 }
402 400
403 mlog(0, "Reading slot map block %u at %llu\n", i, 401 trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
404 (unsigned long long)blkno);
405 402
406 bh = NULL; /* Acquire a fresh bh */ 403 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno, 404 status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
@@ -475,8 +472,6 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
475 int slot; 472 int slot;
476 struct ocfs2_slot_info *si; 473 struct ocfs2_slot_info *si;
477 474
478 mlog_entry_void();
479
480 si = osb->slot_info; 475 si = osb->slot_info;
481 476
482 spin_lock(&osb->osb_lock); 477 spin_lock(&osb->osb_lock);
@@ -505,14 +500,13 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
505 osb->slot_num = slot; 500 osb->slot_num = slot;
506 spin_unlock(&osb->osb_lock); 501 spin_unlock(&osb->osb_lock);
507 502
508 mlog(0, "taking node slot %d\n", osb->slot_num); 503 trace_ocfs2_find_slot(osb->slot_num);
509 504
510 status = ocfs2_update_disk_slot(osb, si, osb->slot_num); 505 status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
511 if (status < 0) 506 if (status < 0)
512 mlog_errno(status); 507 mlog_errno(status);
513 508
514bail: 509bail:
515 mlog_exit(status);
516 return status; 510 return status;
517} 511}
518 512
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 8ce7398ae1d2..1ec56fdb8d0d 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -126,7 +126,7 @@ struct ocfs2_stack_operations {
126 * 126 *
127 * ->connect() must not return until it is guaranteed that 127 * ->connect() must not return until it is guaranteed that
128 * 128 *
129 * - Node down notifications for the filesystem will be recieved 129 * - Node down notifications for the filesystem will be received
130 * and passed to conn->cc_recovery_handler(). 130 * and passed to conn->cc_recovery_handler().
131 * - Locking requests for the filesystem will be processed. 131 * - Locking requests for the filesystem will be processed.
132 */ 132 */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 71998d4d61d5..ba5d97e4a73e 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -29,7 +29,6 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/highmem.h> 30#include <linux/highmem.h>
31 31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 32#include <cluster/masklog.h>
34 33
35#include "ocfs2.h" 34#include "ocfs2.h"
@@ -44,6 +43,7 @@
44#include "super.h" 43#include "super.h"
45#include "sysfile.h" 44#include "sysfile.h"
46#include "uptodate.h" 45#include "uptodate.h"
46#include "ocfs2_trace.h"
47 47
48#include "buffer_head_io.h" 48#include "buffer_head_io.h"
49 49
@@ -308,8 +308,8 @@ static int ocfs2_validate_group_descriptor(struct super_block *sb,
308 int rc; 308 int rc;
309 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; 309 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
310 310
311 mlog(0, "Validating group descriptor %llu\n", 311 trace_ocfs2_validate_group_descriptor(
312 (unsigned long long)bh->b_blocknr); 312 (unsigned long long)bh->b_blocknr);
313 313
314 BUG_ON(!buffer_uptodate(bh)); 314 BUG_ON(!buffer_uptodate(bh));
315 315
@@ -389,8 +389,6 @@ static int ocfs2_block_group_fill(handle_t *handle,
389 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 389 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
390 struct super_block * sb = alloc_inode->i_sb; 390 struct super_block * sb = alloc_inode->i_sb;
391 391
392 mlog_entry_void();
393
394 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { 392 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
395 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != " 393 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
396 "b_blocknr (%llu)", 394 "b_blocknr (%llu)",
@@ -436,7 +434,8 @@ static int ocfs2_block_group_fill(handle_t *handle,
436 * allocation time. */ 434 * allocation time. */
437 435
438bail: 436bail:
439 mlog_exit(status); 437 if (status)
438 mlog_errno(status);
440 return status; 439 return status;
441} 440}
442 441
@@ -477,8 +476,8 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
477 476
478 /* setup the group */ 477 /* setup the group */
479 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 478 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
480 mlog(0, "new descriptor, record %u, at block %llu\n", 479 trace_ocfs2_block_group_alloc_contig(
481 alloc_rec, (unsigned long long)bg_blkno); 480 (unsigned long long)bg_blkno, alloc_rec);
482 481
483 bg_bh = sb_getblk(osb->sb, bg_blkno); 482 bg_bh = sb_getblk(osb->sb, bg_blkno);
484 if (!bg_bh) { 483 if (!bg_bh) {
@@ -657,8 +656,8 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
657 656
658 /* setup the group */ 657 /* setup the group */
659 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 658 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
660 mlog(0, "new descriptor, record %u, at block %llu\n", 659 trace_ocfs2_block_group_alloc_discontig(
661 alloc_rec, (unsigned long long)bg_blkno); 660 (unsigned long long)bg_blkno, alloc_rec);
662 661
663 bg_bh = sb_getblk(osb->sb, bg_blkno); 662 bg_bh = sb_getblk(osb->sb, bg_blkno);
664 if (!bg_bh) { 663 if (!bg_bh) {
@@ -707,8 +706,6 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
707 706
708 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); 707 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
709 708
710 mlog_entry_void();
711
712 cl = &fe->id2.i_chain; 709 cl = &fe->id2.i_chain;
713 status = ocfs2_reserve_clusters_with_limit(osb, 710 status = ocfs2_reserve_clusters_with_limit(osb,
714 le16_to_cpu(cl->cl_cpg), 711 le16_to_cpu(cl->cl_cpg),
@@ -730,8 +727,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
730 } 727 }
731 728
732 if (last_alloc_group && *last_alloc_group != 0) { 729 if (last_alloc_group && *last_alloc_group != 0) {
733 mlog(0, "use old allocation group %llu for block group alloc\n", 730 trace_ocfs2_block_group_alloc(
734 (unsigned long long)*last_alloc_group); 731 (unsigned long long)*last_alloc_group);
735 ac->ac_last_group = *last_alloc_group; 732 ac->ac_last_group = *last_alloc_group;
736 } 733 }
737 734
@@ -796,7 +793,8 @@ bail:
796 793
797 brelse(bg_bh); 794 brelse(bg_bh);
798 795
799 mlog_exit(status); 796 if (status)
797 mlog_errno(status);
800 return status; 798 return status;
801} 799}
802 800
@@ -814,8 +812,6 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
814 struct ocfs2_dinode *fe; 812 struct ocfs2_dinode *fe;
815 u32 free_bits; 813 u32 free_bits;
816 814
817 mlog_entry_void();
818
819 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); 815 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
820 if (!alloc_inode) { 816 if (!alloc_inode) {
821 mlog_errno(-EINVAL); 817 mlog_errno(-EINVAL);
@@ -855,16 +851,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
855 if (bits_wanted > free_bits) { 851 if (bits_wanted > free_bits) {
856 /* cluster bitmap never grows */ 852 /* cluster bitmap never grows */
857 if (ocfs2_is_cluster_bitmap(alloc_inode)) { 853 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
858 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", 854 trace_ocfs2_reserve_suballoc_bits_nospc(bits_wanted,
859 bits_wanted, free_bits); 855 free_bits);
860 status = -ENOSPC; 856 status = -ENOSPC;
861 goto bail; 857 goto bail;
862 } 858 }
863 859
864 if (!(flags & ALLOC_NEW_GROUP)) { 860 if (!(flags & ALLOC_NEW_GROUP)) {
865 mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " 861 trace_ocfs2_reserve_suballoc_bits_no_new_group(
866 "and we don't alloc a new group for it.\n", 862 slot, bits_wanted, free_bits);
867 slot, bits_wanted, free_bits);
868 status = -ENOSPC; 863 status = -ENOSPC;
869 goto bail; 864 goto bail;
870 } 865 }
@@ -890,7 +885,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
890bail: 885bail:
891 brelse(bh); 886 brelse(bh);
892 887
893 mlog_exit(status); 888 if (status)
889 mlog_errno(status);
894 return status; 890 return status;
895} 891}
896 892
@@ -1052,7 +1048,8 @@ bail:
1052 *ac = NULL; 1048 *ac = NULL;
1053 } 1049 }
1054 1050
1055 mlog_exit(status); 1051 if (status)
1052 mlog_errno(status);
1056 return status; 1053 return status;
1057} 1054}
1058 1055
@@ -1119,8 +1116,8 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
1119 spin_lock(&osb->osb_lock); 1116 spin_lock(&osb->osb_lock);
1120 osb->osb_inode_alloc_group = alloc_group; 1117 osb->osb_inode_alloc_group = alloc_group;
1121 spin_unlock(&osb->osb_lock); 1118 spin_unlock(&osb->osb_lock);
1122 mlog(0, "after reservation, new allocation group is " 1119 trace_ocfs2_reserve_new_inode_new_group(
1123 "%llu\n", (unsigned long long)alloc_group); 1120 (unsigned long long)alloc_group);
1124 1121
1125 /* 1122 /*
1126 * Some inodes must be freed by us, so try to allocate 1123 * Some inodes must be freed by us, so try to allocate
@@ -1152,7 +1149,8 @@ bail:
1152 *ac = NULL; 1149 *ac = NULL;
1153 } 1150 }
1154 1151
1155 mlog_exit(status); 1152 if (status)
1153 mlog_errno(status);
1156 return status; 1154 return status;
1157} 1155}
1158 1156
@@ -1189,8 +1187,6 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
1189{ 1187{
1190 int status; 1188 int status;
1191 1189
1192 mlog_entry_void();
1193
1194 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1190 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
1195 if (!(*ac)) { 1191 if (!(*ac)) {
1196 status = -ENOMEM; 1192 status = -ENOMEM;
@@ -1229,7 +1225,8 @@ bail:
1229 *ac = NULL; 1225 *ac = NULL;
1230 } 1226 }
1231 1227
1232 mlog_exit(status); 1228 if (status)
1229 mlog_errno(status);
1233 return status; 1230 return status;
1234} 1231}
1235 1232
@@ -1357,15 +1354,12 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1357 void *bitmap = bg->bg_bitmap; 1354 void *bitmap = bg->bg_bitmap;
1358 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1355 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1359 1356
1360 mlog_entry_void();
1361
1362 /* All callers get the descriptor via 1357 /* All callers get the descriptor via
1363 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ 1358 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
1364 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); 1359 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1365 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 1360 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
1366 1361
1367 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 1362 trace_ocfs2_block_group_set_bits(bit_off, num_bits);
1368 num_bits);
1369 1363
1370 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1364 if (ocfs2_is_cluster_bitmap(alloc_inode))
1371 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1365 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
@@ -1394,7 +1388,8 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1394 ocfs2_journal_dirty(handle, group_bh); 1388 ocfs2_journal_dirty(handle, group_bh);
1395 1389
1396bail: 1390bail:
1397 mlog_exit(status); 1391 if (status)
1392 mlog_errno(status);
1398 return status; 1393 return status;
1399} 1394}
1400 1395
@@ -1437,10 +1432,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
1437 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); 1432 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1438 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); 1433 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
1439 1434
1440 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", 1435 trace_ocfs2_relink_block_group(
1441 (unsigned long long)le64_to_cpu(fe->i_blkno), chain, 1436 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
1442 (unsigned long long)le64_to_cpu(bg->bg_blkno), 1437 (unsigned long long)le64_to_cpu(bg->bg_blkno),
1443 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); 1438 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
1444 1439
1445 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); 1440 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
1446 bg_ptr = le64_to_cpu(bg->bg_next_group); 1441 bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -1484,7 +1479,8 @@ out_rollback:
1484 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 1479 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1485 } 1480 }
1486 1481
1487 mlog_exit(status); 1482 if (status)
1483 mlog_errno(status);
1488 return status; 1484 return status;
1489} 1485}
1490 1486
@@ -1515,7 +1511,7 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1515 max_bits = le16_to_cpu(gd->bg_bits); 1511 max_bits = le16_to_cpu(gd->bg_bits);
1516 1512
1517 /* Tail groups in cluster bitmaps which aren't cpg 1513 /* Tail groups in cluster bitmaps which aren't cpg
1518 * aligned are prone to partial extention by a failed 1514 * aligned are prone to partial extension by a failed
1519 * fs resize. If the file system resize never got to 1515 * fs resize. If the file system resize never got to
1520 * update the dinode cluster count, then we don't want 1516 * update the dinode cluster count, then we don't want
1521 * to trust any clusters past it, regardless of what 1517 * to trust any clusters past it, regardless of what
@@ -1525,10 +1521,10 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1525 if ((gd_cluster_off + max_bits) > 1521 if ((gd_cluster_off + max_bits) >
1526 OCFS2_I(inode)->ip_clusters) { 1522 OCFS2_I(inode)->ip_clusters) {
1527 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; 1523 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
1528 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", 1524 trace_ocfs2_cluster_group_search_wrong_max_bits(
1529 (unsigned long long)le64_to_cpu(gd->bg_blkno), 1525 (unsigned long long)le64_to_cpu(gd->bg_blkno),
1530 le16_to_cpu(gd->bg_bits), 1526 le16_to_cpu(gd->bg_bits),
1531 OCFS2_I(inode)->ip_clusters, max_bits); 1527 OCFS2_I(inode)->ip_clusters, max_bits);
1532 } 1528 }
1533 1529
1534 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1530 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
@@ -1542,9 +1538,9 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1542 gd_cluster_off + 1538 gd_cluster_off +
1543 res->sr_bit_offset + 1539 res->sr_bit_offset +
1544 res->sr_bits); 1540 res->sr_bits);
1545 mlog(0, "Checking %llu against %llu\n", 1541 trace_ocfs2_cluster_group_search_max_block(
1546 (unsigned long long)blkoff, 1542 (unsigned long long)blkoff,
1547 (unsigned long long)max_block); 1543 (unsigned long long)max_block);
1548 if (blkoff > max_block) 1544 if (blkoff > max_block)
1549 return -ENOSPC; 1545 return -ENOSPC;
1550 } 1546 }
@@ -1588,9 +1584,9 @@ static int ocfs2_block_group_search(struct inode *inode,
1588 if (!ret && max_block) { 1584 if (!ret && max_block) {
1589 blkoff = le64_to_cpu(bg->bg_blkno) + 1585 blkoff = le64_to_cpu(bg->bg_blkno) +
1590 res->sr_bit_offset + res->sr_bits; 1586 res->sr_bit_offset + res->sr_bits;
1591 mlog(0, "Checking %llu against %llu\n", 1587 trace_ocfs2_block_group_search_max_block(
1592 (unsigned long long)blkoff, 1588 (unsigned long long)blkoff,
1593 (unsigned long long)max_block); 1589 (unsigned long long)max_block);
1594 if (blkoff > max_block) 1590 if (blkoff > max_block)
1595 ret = -ENOSPC; 1591 ret = -ENOSPC;
1596 } 1592 }
@@ -1756,9 +1752,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1756 struct ocfs2_group_desc *bg; 1752 struct ocfs2_group_desc *bg;
1757 1753
1758 chain = ac->ac_chain; 1754 chain = ac->ac_chain;
1759 mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", 1755 trace_ocfs2_search_chain_begin(
1760 bits_wanted, chain, 1756 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
1761 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 1757 bits_wanted, chain);
1762 1758
1763 status = ocfs2_read_group_descriptor(alloc_inode, fe, 1759 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1764 le64_to_cpu(cl->cl_recs[chain].c_blkno), 1760 le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -1799,8 +1795,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1799 goto bail; 1795 goto bail;
1800 } 1796 }
1801 1797
1802 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1798 trace_ocfs2_search_chain_succ(
1803 res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); 1799 (unsigned long long)le64_to_cpu(bg->bg_blkno), res->sr_bits);
1804 1800
1805 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno); 1801 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
1806 1802
@@ -1861,8 +1857,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1861 goto bail; 1857 goto bail;
1862 } 1858 }
1863 1859
1864 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, 1860 trace_ocfs2_search_chain_end(
1865 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1861 (unsigned long long)le64_to_cpu(fe->i_blkno),
1862 res->sr_bits);
1866 1863
1867out_loc_only: 1864out_loc_only:
1868 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1865 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
@@ -1870,7 +1867,8 @@ bail:
1870 brelse(group_bh); 1867 brelse(group_bh);
1871 brelse(prev_group_bh); 1868 brelse(prev_group_bh);
1872 1869
1873 mlog_exit(status); 1870 if (status)
1871 mlog_errno(status);
1874 return status; 1872 return status;
1875} 1873}
1876 1874
@@ -1888,8 +1886,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1888 struct ocfs2_chain_list *cl; 1886 struct ocfs2_chain_list *cl;
1889 struct ocfs2_dinode *fe; 1887 struct ocfs2_dinode *fe;
1890 1888
1891 mlog_entry_void();
1892
1893 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1889 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1894 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); 1890 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1895 BUG_ON(!ac->ac_bh); 1891 BUG_ON(!ac->ac_bh);
@@ -1945,8 +1941,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1945 goto bail; 1941 goto bail;
1946 } 1942 }
1947 1943
1948 mlog(0, "Search of victim chain %u came up with nothing, " 1944 trace_ocfs2_claim_suballoc_bits(victim);
1949 "trying all chains now.\n", victim);
1950 1945
1951 /* If we didn't pick a good victim, then just default to 1946 /* If we didn't pick a good victim, then just default to
1952 * searching each chain in order. Don't allow chain relinking 1947 * searching each chain in order. Don't allow chain relinking
@@ -1984,7 +1979,8 @@ set_hint:
1984 } 1979 }
1985 1980
1986bail: 1981bail:
1987 mlog_exit(status); 1982 if (status)
1983 mlog_errno(status);
1988 return status; 1984 return status;
1989} 1985}
1990 1986
@@ -2021,7 +2017,8 @@ int ocfs2_claim_metadata(handle_t *handle,
2021 *num_bits = res.sr_bits; 2017 *num_bits = res.sr_bits;
2022 status = 0; 2018 status = 0;
2023bail: 2019bail:
2024 mlog_exit(status); 2020 if (status)
2021 mlog_errno(status);
2025 return status; 2022 return status;
2026} 2023}
2027 2024
@@ -2172,8 +2169,8 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle,
2172 goto out; 2169 goto out;
2173 } 2170 }
2174 2171
2175 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, 2172 trace_ocfs2_claim_new_inode_at_loc((unsigned long long)di_blkno,
2176 (unsigned long long)di_blkno); 2173 res->sr_bits);
2177 2174
2178 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); 2175 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2179 2176
@@ -2201,8 +2198,6 @@ int ocfs2_claim_new_inode(handle_t *handle,
2201 int status; 2198 int status;
2202 struct ocfs2_suballoc_result res; 2199 struct ocfs2_suballoc_result res;
2203 2200
2204 mlog_entry_void();
2205
2206 BUG_ON(!ac); 2201 BUG_ON(!ac);
2207 BUG_ON(ac->ac_bits_given != 0); 2202 BUG_ON(ac->ac_bits_given != 0);
2208 BUG_ON(ac->ac_bits_wanted != 1); 2203 BUG_ON(ac->ac_bits_wanted != 1);
@@ -2230,7 +2225,8 @@ int ocfs2_claim_new_inode(handle_t *handle,
2230 ocfs2_save_inode_ac_group(dir, ac); 2225 ocfs2_save_inode_ac_group(dir, ac);
2231 status = 0; 2226 status = 0;
2232bail: 2227bail:
2233 mlog_exit(status); 2228 if (status)
2229 mlog_errno(status);
2234 return status; 2230 return status;
2235} 2231}
2236 2232
@@ -2307,8 +2303,6 @@ int __ocfs2_claim_clusters(handle_t *handle,
2307 struct ocfs2_suballoc_result res = { .sr_blkno = 0, }; 2303 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
2308 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb); 2304 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
2309 2305
2310 mlog_entry_void();
2311
2312 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 2306 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
2313 2307
2314 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL 2308 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -2363,7 +2357,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
2363 ac->ac_bits_given += *num_clusters; 2357 ac->ac_bits_given += *num_clusters;
2364 2358
2365bail: 2359bail:
2366 mlog_exit(status); 2360 if (status)
2361 mlog_errno(status);
2367 return status; 2362 return status;
2368} 2363}
2369 2364
@@ -2392,13 +2387,11 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
2392 unsigned int tmp; 2387 unsigned int tmp;
2393 struct ocfs2_group_desc *undo_bg = NULL; 2388 struct ocfs2_group_desc *undo_bg = NULL;
2394 2389
2395 mlog_entry_void();
2396
2397 /* The caller got this descriptor from 2390 /* The caller got this descriptor from
2398 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ 2391 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
2399 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); 2392 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
2400 2393
2401 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 2394 trace_ocfs2_block_group_clear_bits(bit_off, num_bits);
2402 2395
2403 BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); 2396 BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
2404 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), 2397 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
@@ -2463,19 +2456,18 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2463 struct buffer_head *group_bh = NULL; 2456 struct buffer_head *group_bh = NULL;
2464 struct ocfs2_group_desc *group; 2457 struct ocfs2_group_desc *group;
2465 2458
2466 mlog_entry_void();
2467
2468 /* The alloc_bh comes from ocfs2_free_dinode() or 2459 /* The alloc_bh comes from ocfs2_free_dinode() or
2469 * ocfs2_free_clusters(). The callers have all locked the 2460 * ocfs2_free_clusters(). The callers have all locked the
2470 * allocator and gotten alloc_bh from the lock call. This 2461 * allocator and gotten alloc_bh from the lock call. This
2471 * validates the dinode buffer. Any corruption that has happended 2462 * validates the dinode buffer. Any corruption that has happened
2472 * is a code bug. */ 2463 * is a code bug. */
2473 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 2464 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
2474 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 2465 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
2475 2466
2476 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", 2467 trace_ocfs2_free_suballoc_bits(
2477 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 2468 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
2478 (unsigned long long)bg_blkno, start_bit); 2469 (unsigned long long)bg_blkno,
2470 start_bit, count);
2479 2471
2480 status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno, 2472 status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
2481 &group_bh); 2473 &group_bh);
@@ -2511,7 +2503,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2511bail: 2503bail:
2512 brelse(group_bh); 2504 brelse(group_bh);
2513 2505
2514 mlog_exit(status); 2506 if (status)
2507 mlog_errno(status);
2515 return status; 2508 return status;
2516} 2509}
2517 2510
@@ -2556,11 +2549,8 @@ static int _ocfs2_free_clusters(handle_t *handle,
2556 2549
2557 /* You can't ever have a contiguous set of clusters 2550 /* You can't ever have a contiguous set of clusters
2558 * bigger than a block group bitmap so we never have to worry 2551 * bigger than a block group bitmap so we never have to worry
2559 * about looping on them. */ 2552 * about looping on them.
2560 2553 * This is expensive. We can safely remove once this stuff has
2561 mlog_entry_void();
2562
2563 /* This is expensive. We can safely remove once this stuff has
2564 * gotten tested really well. */ 2554 * gotten tested really well. */
2565 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); 2555 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
2566 2556
@@ -2569,10 +2559,9 @@ static int _ocfs2_free_clusters(handle_t *handle,
2569 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, 2559 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
2570 &bg_start_bit); 2560 &bg_start_bit);
2571 2561
2572 mlog(0, "want to free %u clusters starting at block %llu\n", 2562 trace_ocfs2_free_clusters((unsigned long long)bg_blkno,
2573 num_clusters, (unsigned long long)start_blk); 2563 (unsigned long long)start_blk,
2574 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", 2564 bg_start_bit, num_clusters);
2575 (unsigned long long)bg_blkno, bg_start_bit);
2576 2565
2577 status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 2566 status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
2578 bg_start_bit, bg_blkno, 2567 bg_start_bit, bg_blkno,
@@ -2586,7 +2575,8 @@ static int _ocfs2_free_clusters(handle_t *handle,
2586 num_clusters); 2575 num_clusters);
2587 2576
2588out: 2577out:
2589 mlog_exit(status); 2578 if (status)
2579 mlog_errno(status);
2590 return status; 2580 return status;
2591} 2581}
2592 2582
@@ -2756,7 +2746,7 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2756 struct buffer_head *inode_bh = NULL; 2746 struct buffer_head *inode_bh = NULL;
2757 struct ocfs2_dinode *inode_fe; 2747 struct ocfs2_dinode *inode_fe;
2758 2748
2759 mlog_entry("blkno: %llu\n", (unsigned long long)blkno); 2749 trace_ocfs2_get_suballoc_slot_bit((unsigned long long)blkno);
2760 2750
2761 /* dirty read disk */ 2751 /* dirty read disk */
2762 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); 2752 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
@@ -2793,7 +2783,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2793bail: 2783bail:
2794 brelse(inode_bh); 2784 brelse(inode_bh);
2795 2785
2796 mlog_exit(status); 2786 if (status)
2787 mlog_errno(status);
2797 return status; 2788 return status;
2798} 2789}
2799 2790
@@ -2816,8 +2807,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2816 u64 bg_blkno; 2807 u64 bg_blkno;
2817 int status; 2808 int status;
2818 2809
2819 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, 2810 trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
2820 (unsigned int)bit); 2811 (unsigned int)bit);
2821 2812
2822 alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data; 2813 alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
2823 if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) { 2814 if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
@@ -2844,7 +2835,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2844bail: 2835bail:
2845 brelse(group_bh); 2836 brelse(group_bh);
2846 2837
2847 mlog_exit(status); 2838 if (status)
2839 mlog_errno(status);
2848 return status; 2840 return status;
2849} 2841}
2850 2842
@@ -2869,7 +2861,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2869 struct inode *inode_alloc_inode; 2861 struct inode *inode_alloc_inode;
2870 struct buffer_head *alloc_bh = NULL; 2862 struct buffer_head *alloc_bh = NULL;
2871 2863
2872 mlog_entry("blkno: %llu", (unsigned long long)blkno); 2864 trace_ocfs2_test_inode_bit((unsigned long long)blkno);
2873 2865
2874 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, 2866 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2875 &group_blkno, &suballoc_bit); 2867 &group_blkno, &suballoc_bit);
@@ -2910,6 +2902,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2910 iput(inode_alloc_inode); 2902 iput(inode_alloc_inode);
2911 brelse(alloc_bh); 2903 brelse(alloc_bh);
2912bail: 2904bail:
2913 mlog_exit(status); 2905 if (status)
2906 mlog_errno(status);
2914 return status; 2907 return status;
2915} 2908}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 236ed1bdca2c..5a521c748859 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,7 +42,9 @@
42#include <linux/seq_file.h> 42#include <linux/seq_file.h>
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44 44
45#define MLOG_MASK_PREFIX ML_SUPER 45#define CREATE_TRACE_POINTS
46#include "ocfs2_trace.h"
47
46#include <cluster/masklog.h> 48#include <cluster/masklog.h>
47 49
48#include "ocfs2.h" 50#include "ocfs2.h"
@@ -76,7 +78,7 @@ static struct kmem_cache *ocfs2_inode_cachep = NULL;
76struct kmem_cache *ocfs2_dquot_cachep; 78struct kmem_cache *ocfs2_dquot_cachep;
77struct kmem_cache *ocfs2_qf_chunk_cachep; 79struct kmem_cache *ocfs2_qf_chunk_cachep;
78 80
79/* OCFS2 needs to schedule several differnt types of work which 81/* OCFS2 needs to schedule several different types of work which
80 * require cluster locking, disk I/O, recovery waits, etc. Since these 82 * require cluster locking, disk I/O, recovery waits, etc. Since these
81 * types of work tend to be heavy we avoid using the kernel events 83 * types of work tend to be heavy we avoid using the kernel events
82 * workqueue and schedule on our own. */ 84 * workqueue and schedule on our own. */
@@ -441,8 +443,6 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
441 int status = 0; 443 int status = 0;
442 int i; 444 int i;
443 445
444 mlog_entry_void();
445
446 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); 446 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
447 if (IS_ERR(new)) { 447 if (IS_ERR(new)) {
448 status = PTR_ERR(new); 448 status = PTR_ERR(new);
@@ -478,7 +478,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
478 } 478 }
479 479
480bail: 480bail:
481 mlog_exit(status); 481 if (status)
482 mlog_errno(status);
482 return status; 483 return status;
483} 484}
484 485
@@ -488,8 +489,6 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
488 int status = 0; 489 int status = 0;
489 int i; 490 int i;
490 491
491 mlog_entry_void();
492
493 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; 492 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
494 i < NUM_SYSTEM_INODES; 493 i < NUM_SYSTEM_INODES;
495 i++) { 494 i++) {
@@ -508,7 +507,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
508 } 507 }
509 508
510bail: 509bail:
511 mlog_exit(status); 510 if (status)
511 mlog_errno(status);
512 return status; 512 return status;
513} 513}
514 514
@@ -517,8 +517,6 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
517 int i; 517 int i;
518 struct inode *inode; 518 struct inode *inode;
519 519
520 mlog_entry_void();
521
522 for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { 520 for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) {
523 inode = osb->global_system_inodes[i]; 521 inode = osb->global_system_inodes[i];
524 if (inode) { 522 if (inode) {
@@ -540,7 +538,7 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
540 } 538 }
541 539
542 if (!osb->local_system_inodes) 540 if (!osb->local_system_inodes)
543 goto out; 541 return;
544 542
545 for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { 543 for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) {
546 if (osb->local_system_inodes[i]) { 544 if (osb->local_system_inodes[i]) {
@@ -551,9 +549,6 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
551 549
552 kfree(osb->local_system_inodes); 550 kfree(osb->local_system_inodes);
553 osb->local_system_inodes = NULL; 551 osb->local_system_inodes = NULL;
554
555out:
556 mlog_exit(0);
557} 552}
558 553
559/* We're allocating fs objects, use GFP_NOFS */ 554/* We're allocating fs objects, use GFP_NOFS */
@@ -684,12 +679,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
684 } 679 }
685 680
686 if (*flags & MS_RDONLY) { 681 if (*flags & MS_RDONLY) {
687 mlog(0, "Going to ro mode.\n");
688 sb->s_flags |= MS_RDONLY; 682 sb->s_flags |= MS_RDONLY;
689 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 683 osb->osb_flags |= OCFS2_OSB_SOFT_RO;
690 } else { 684 } else {
691 mlog(0, "Making ro filesystem writeable.\n");
692
693 if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { 685 if (osb->osb_flags & OCFS2_OSB_ERROR_FS) {
694 mlog(ML_ERROR, "Cannot remount RDWR " 686 mlog(ML_ERROR, "Cannot remount RDWR "
695 "filesystem due to previous errors.\n"); 687 "filesystem due to previous errors.\n");
@@ -707,6 +699,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
707 sb->s_flags &= ~MS_RDONLY; 699 sb->s_flags &= ~MS_RDONLY;
708 osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; 700 osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
709 } 701 }
702 trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);
710unlock_osb: 703unlock_osb:
711 spin_unlock(&osb->osb_lock); 704 spin_unlock(&osb->osb_lock);
712 /* Enable quota accounting after remounting RW */ 705 /* Enable quota accounting after remounting RW */
@@ -1032,7 +1025,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1032 char nodestr[8]; 1025 char nodestr[8];
1033 struct ocfs2_blockcheck_stats stats; 1026 struct ocfs2_blockcheck_stats stats;
1034 1027
1035 mlog_entry("%p, %p, %i", sb, data, silent); 1028 trace_ocfs2_fill_super(sb, data, silent);
1036 1029
1037 if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { 1030 if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
1038 status = -EINVAL; 1031 status = -EINVAL;
@@ -1208,7 +1201,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1208 mlog_errno(status); 1201 mlog_errno(status);
1209 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1202 atomic_set(&osb->vol_state, VOLUME_DISABLED);
1210 wake_up(&osb->osb_mount_event); 1203 wake_up(&osb->osb_mount_event);
1211 mlog_exit(status);
1212 return status; 1204 return status;
1213 } 1205 }
1214 } 1206 }
@@ -1222,7 +1214,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1222 /* Start this when the mount is almost sure of being successful */ 1214 /* Start this when the mount is almost sure of being successful */
1223 ocfs2_orphan_scan_start(osb); 1215 ocfs2_orphan_scan_start(osb);
1224 1216
1225 mlog_exit(status);
1226 return status; 1217 return status;
1227 1218
1228read_super_error: 1219read_super_error:
@@ -1237,7 +1228,8 @@ read_super_error:
1237 ocfs2_dismount_volume(sb, 1); 1228 ocfs2_dismount_volume(sb, 1);
1238 } 1229 }
1239 1230
1240 mlog_exit(status); 1231 if (status)
1232 mlog_errno(status);
1241 return status; 1233 return status;
1242} 1234}
1243 1235
@@ -1320,8 +1312,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1320 char *p; 1312 char *p;
1321 u32 tmp; 1313 u32 tmp;
1322 1314
1323 mlog_entry("remount: %d, options: \"%s\"\n", is_remount, 1315 trace_ocfs2_parse_options(is_remount, options ? options : "(none)");
1324 options ? options : "(none)");
1325 1316
1326 mopt->commit_interval = 0; 1317 mopt->commit_interval = 0;
1327 mopt->mount_opt = OCFS2_MOUNT_NOINTR; 1318 mopt->mount_opt = OCFS2_MOUNT_NOINTR;
@@ -1538,7 +1529,6 @@ static int ocfs2_parse_options(struct super_block *sb,
1538 status = 1; 1529 status = 1;
1539 1530
1540bail: 1531bail:
1541 mlog_exit(status);
1542 return status; 1532 return status;
1543} 1533}
1544 1534
@@ -1629,8 +1619,6 @@ static int __init ocfs2_init(void)
1629{ 1619{
1630 int status; 1620 int status;
1631 1621
1632 mlog_entry_void();
1633
1634 ocfs2_print_version(); 1622 ocfs2_print_version();
1635 1623
1636 status = init_ocfs2_uptodate_cache(); 1624 status = init_ocfs2_uptodate_cache();
@@ -1664,10 +1652,9 @@ leave:
1664 if (status < 0) { 1652 if (status < 0) {
1665 ocfs2_free_mem_caches(); 1653 ocfs2_free_mem_caches();
1666 exit_ocfs2_uptodate_cache(); 1654 exit_ocfs2_uptodate_cache();
1655 mlog_errno(status);
1667 } 1656 }
1668 1657
1669 mlog_exit(status);
1670
1671 if (status >= 0) { 1658 if (status >= 0) {
1672 return register_filesystem(&ocfs2_fs_type); 1659 return register_filesystem(&ocfs2_fs_type);
1673 } else 1660 } else
@@ -1676,8 +1663,6 @@ leave:
1676 1663
1677static void __exit ocfs2_exit(void) 1664static void __exit ocfs2_exit(void)
1678{ 1665{
1679 mlog_entry_void();
1680
1681 if (ocfs2_wq) { 1666 if (ocfs2_wq) {
1682 flush_workqueue(ocfs2_wq); 1667 flush_workqueue(ocfs2_wq);
1683 destroy_workqueue(ocfs2_wq); 1668 destroy_workqueue(ocfs2_wq);
@@ -1692,18 +1677,14 @@ static void __exit ocfs2_exit(void)
1692 unregister_filesystem(&ocfs2_fs_type); 1677 unregister_filesystem(&ocfs2_fs_type);
1693 1678
1694 exit_ocfs2_uptodate_cache(); 1679 exit_ocfs2_uptodate_cache();
1695
1696 mlog_exit_void();
1697} 1680}
1698 1681
1699static void ocfs2_put_super(struct super_block *sb) 1682static void ocfs2_put_super(struct super_block *sb)
1700{ 1683{
1701 mlog_entry("(0x%p)\n", sb); 1684 trace_ocfs2_put_super(sb);
1702 1685
1703 ocfs2_sync_blockdev(sb); 1686 ocfs2_sync_blockdev(sb);
1704 ocfs2_dismount_volume(sb, 0); 1687 ocfs2_dismount_volume(sb, 0);
1705
1706 mlog_exit_void();
1707} 1688}
1708 1689
1709static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) 1690static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1715,7 +1696,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
1715 struct buffer_head *bh = NULL; 1696 struct buffer_head *bh = NULL;
1716 struct inode *inode = NULL; 1697 struct inode *inode = NULL;
1717 1698
1718 mlog_entry("(%p, %p)\n", dentry->d_sb, buf); 1699 trace_ocfs2_statfs(dentry->d_sb, buf);
1719 1700
1720 osb = OCFS2_SB(dentry->d_sb); 1701 osb = OCFS2_SB(dentry->d_sb);
1721 1702
@@ -1762,7 +1743,8 @@ bail:
1762 if (inode) 1743 if (inode)
1763 iput(inode); 1744 iput(inode);
1764 1745
1765 mlog_exit(status); 1746 if (status)
1747 mlog_errno(status);
1766 1748
1767 return status; 1749 return status;
1768} 1750}
@@ -1882,8 +1864,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
1882 int unlock_super = 0; 1864 int unlock_super = 0;
1883 struct ocfs2_super *osb = OCFS2_SB(sb); 1865 struct ocfs2_super *osb = OCFS2_SB(sb);
1884 1866
1885 mlog_entry_void();
1886
1887 if (ocfs2_is_hard_readonly(osb)) 1867 if (ocfs2_is_hard_readonly(osb))
1888 goto leave; 1868 goto leave;
1889 1869
@@ -1928,7 +1908,6 @@ leave:
1928 if (unlock_super) 1908 if (unlock_super)
1929 ocfs2_super_unlock(osb, 1); 1909 ocfs2_super_unlock(osb, 1);
1930 1910
1931 mlog_exit(status);
1932 return status; 1911 return status;
1933} 1912}
1934 1913
@@ -1938,7 +1917,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1938 struct ocfs2_super *osb = NULL; 1917 struct ocfs2_super *osb = NULL;
1939 char nodestr[8]; 1918 char nodestr[8];
1940 1919
1941 mlog_entry("(0x%p)\n", sb); 1920 trace_ocfs2_dismount_volume(sb);
1942 1921
1943 BUG_ON(!sb); 1922 BUG_ON(!sb);
1944 osb = OCFS2_SB(sb); 1923 osb = OCFS2_SB(sb);
@@ -2090,8 +2069,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2090 struct ocfs2_super *osb; 2069 struct ocfs2_super *osb;
2091 u64 total_blocks; 2070 u64 total_blocks;
2092 2071
2093 mlog_entry_void();
2094
2095 osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL); 2072 osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL);
2096 if (!osb) { 2073 if (!osb) {
2097 status = -ENOMEM; 2074 status = -ENOMEM;
@@ -2155,7 +2132,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2155 status = -EINVAL; 2132 status = -EINVAL;
2156 goto bail; 2133 goto bail;
2157 } 2134 }
2158 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
2159 2135
2160 ocfs2_orphan_scan_init(osb); 2136 ocfs2_orphan_scan_init(osb);
2161 2137
@@ -2294,7 +2270,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2294 osb->s_clustersize_bits = 2270 osb->s_clustersize_bits =
2295 le32_to_cpu(di->id2.i_super.s_clustersize_bits); 2271 le32_to_cpu(di->id2.i_super.s_clustersize_bits);
2296 osb->s_clustersize = 1 << osb->s_clustersize_bits; 2272 osb->s_clustersize = 1 << osb->s_clustersize_bits;
2297 mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits);
2298 2273
2299 if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE || 2274 if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE ||
2300 osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) { 2275 osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) {
@@ -2333,11 +2308,10 @@ static int ocfs2_initialize_super(struct super_block *sb,
2333 le64_to_cpu(di->id2.i_super.s_first_cluster_group); 2308 le64_to_cpu(di->id2.i_super.s_first_cluster_group);
2334 osb->fs_generation = le32_to_cpu(di->i_fs_generation); 2309 osb->fs_generation = le32_to_cpu(di->i_fs_generation);
2335 osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); 2310 osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash);
2336 mlog(0, "vol_label: %s\n", osb->vol_label); 2311 trace_ocfs2_initialize_super(osb->vol_label, osb->uuid_str,
2337 mlog(0, "uuid: %s\n", osb->uuid_str); 2312 (unsigned long long)osb->root_blkno,
2338 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", 2313 (unsigned long long)osb->system_dir_blkno,
2339 (unsigned long long)osb->root_blkno, 2314 osb->s_clustersize_bits);
2340 (unsigned long long)osb->system_dir_blkno);
2341 2315
2342 osb->osb_dlm_debug = ocfs2_new_dlm_debug(); 2316 osb->osb_dlm_debug = ocfs2_new_dlm_debug();
2343 if (!osb->osb_dlm_debug) { 2317 if (!osb->osb_dlm_debug) {
@@ -2380,7 +2354,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2380 } 2354 }
2381 2355
2382bail: 2356bail:
2383 mlog_exit(status);
2384 return status; 2357 return status;
2385} 2358}
2386 2359
@@ -2396,8 +2369,6 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
2396{ 2369{
2397 int status = -EAGAIN; 2370 int status = -EAGAIN;
2398 2371
2399 mlog_entry_void();
2400
2401 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, 2372 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
2402 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { 2373 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
2403 /* We have to do a raw check of the feature here */ 2374 /* We have to do a raw check of the feature here */
@@ -2452,7 +2423,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
2452 } 2423 }
2453 2424
2454out: 2425out:
2455 mlog_exit(status); 2426 if (status && status != -EAGAIN)
2427 mlog_errno(status);
2456 return status; 2428 return status;
2457} 2429}
2458 2430
@@ -2465,8 +2437,6 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2465 * recover 2437 * recover
2466 * ourselves. */ 2438 * ourselves. */
2467 2439
2468 mlog_entry_void();
2469
2470 /* Init our journal object. */ 2440 /* Init our journal object. */
2471 status = ocfs2_journal_init(osb->journal, &dirty); 2441 status = ocfs2_journal_init(osb->journal, &dirty);
2472 if (status < 0) { 2442 if (status < 0) {
@@ -2516,8 +2486,6 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2516 * ourselves as mounted. */ 2486 * ourselves as mounted. */
2517 } 2487 }
2518 2488
2519 mlog(0, "Journal loaded.\n");
2520
2521 status = ocfs2_load_local_alloc(osb); 2489 status = ocfs2_load_local_alloc(osb);
2522 if (status < 0) { 2490 if (status < 0) {
2523 mlog_errno(status); 2491 mlog_errno(status);
@@ -2549,7 +2517,8 @@ finally:
2549 if (local_alloc) 2517 if (local_alloc)
2550 kfree(local_alloc); 2518 kfree(local_alloc);
2551 2519
2552 mlog_exit(status); 2520 if (status)
2521 mlog_errno(status);
2553 return status; 2522 return status;
2554} 2523}
2555 2524
@@ -2561,8 +2530,6 @@ finally:
2561 */ 2530 */
2562static void ocfs2_delete_osb(struct ocfs2_super *osb) 2531static void ocfs2_delete_osb(struct ocfs2_super *osb)
2563{ 2532{
2564 mlog_entry_void();
2565
2566 /* This function assumes that the caller has the main osb resource */ 2533 /* This function assumes that the caller has the main osb resource */
2567 2534
2568 ocfs2_free_slot_info(osb); 2535 ocfs2_free_slot_info(osb);
@@ -2580,8 +2547,6 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
2580 kfree(osb->uuid_str); 2547 kfree(osb->uuid_str);
2581 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2548 ocfs2_put_dlm_debug(osb->osb_dlm_debug);
2582 memset(osb, 0, sizeof(struct ocfs2_super)); 2549 memset(osb, 0, sizeof(struct ocfs2_super));
2583
2584 mlog_exit_void();
2585} 2550}
2586 2551
2587/* Put OCFS2 into a readonly state, or (if the user specifies it), 2552/* Put OCFS2 into a readonly state, or (if the user specifies it),
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 9975457c981f..5d22872e2bb3 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -40,7 +40,6 @@
40#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/namei.h> 41#include <linux/namei.h>
42 42
43#define MLOG_MASK_PREFIX ML_NAMEI
44#include <cluster/masklog.h> 43#include <cluster/masklog.h>
45 44
46#include "ocfs2.h" 45#include "ocfs2.h"
@@ -62,8 +61,6 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
62 char *link = NULL; 61 char *link = NULL;
63 struct ocfs2_dinode *fe; 62 struct ocfs2_dinode *fe;
64 63
65 mlog_entry_void();
66
67 status = ocfs2_read_inode_block(inode, bh); 64 status = ocfs2_read_inode_block(inode, bh);
68 if (status < 0) { 65 if (status < 0) {
69 mlog_errno(status); 66 mlog_errno(status);
@@ -74,7 +71,6 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
74 fe = (struct ocfs2_dinode *) (*bh)->b_data; 71 fe = (struct ocfs2_dinode *) (*bh)->b_data;
75 link = (char *) fe->id2.i_symlink; 72 link = (char *) fe->id2.i_symlink;
76bail: 73bail:
77 mlog_exit(status);
78 74
79 return link; 75 return link;
80} 76}
@@ -88,8 +84,6 @@ static int ocfs2_readlink(struct dentry *dentry,
88 struct buffer_head *bh = NULL; 84 struct buffer_head *bh = NULL;
89 struct inode *inode = dentry->d_inode; 85 struct inode *inode = dentry->d_inode;
90 86
91 mlog_entry_void();
92
93 link = ocfs2_fast_symlink_getlink(inode, &bh); 87 link = ocfs2_fast_symlink_getlink(inode, &bh);
94 if (IS_ERR(link)) { 88 if (IS_ERR(link)) {
95 ret = PTR_ERR(link); 89 ret = PTR_ERR(link);
@@ -104,7 +98,8 @@ static int ocfs2_readlink(struct dentry *dentry,
104 98
105 brelse(bh); 99 brelse(bh);
106out: 100out:
107 mlog_exit(ret); 101 if (ret < 0)
102 mlog_errno(ret);
108 return ret; 103 return ret;
109} 104}
110 105
@@ -117,8 +112,6 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
117 struct inode *inode = dentry->d_inode; 112 struct inode *inode = dentry->d_inode;
118 struct buffer_head *bh = NULL; 113 struct buffer_head *bh = NULL;
119 114
120 mlog_entry_void();
121
122 BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); 115 BUG_ON(!ocfs2_inode_is_fast_symlink(inode));
123 target = ocfs2_fast_symlink_getlink(inode, &bh); 116 target = ocfs2_fast_symlink_getlink(inode, &bh);
124 if (IS_ERR(target)) { 117 if (IS_ERR(target)) {
@@ -142,7 +135,8 @@ bail:
142 nd_set_link(nd, status ? ERR_PTR(status) : link); 135 nd_set_link(nd, status ? ERR_PTR(status) : link);
143 brelse(bh); 136 brelse(bh);
144 137
145 mlog_exit(status); 138 if (status)
139 mlog_errno(status);
146 return NULL; 140 return NULL;
147} 141}
148 142
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 902efb23b6a6..3d635f4bbb20 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -27,7 +27,6 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29 29
30#define MLOG_MASK_PREFIX ML_INODE
31#include <cluster/masklog.h> 30#include <cluster/masklog.h>
32 31
33#include "ocfs2.h" 32#include "ocfs2.h"
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index a0a120e82b97..52eaf33d346f 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -54,14 +54,13 @@
54#include <linux/buffer_head.h> 54#include <linux/buffer_head.h>
55#include <linux/rbtree.h> 55#include <linux/rbtree.h>
56 56
57#define MLOG_MASK_PREFIX ML_UPTODATE
58
59#include <cluster/masklog.h> 57#include <cluster/masklog.h>
60 58
61#include "ocfs2.h" 59#include "ocfs2.h"
62 60
63#include "inode.h" 61#include "inode.h"
64#include "uptodate.h" 62#include "uptodate.h"
63#include "ocfs2_trace.h"
65 64
66struct ocfs2_meta_cache_item { 65struct ocfs2_meta_cache_item {
67 struct rb_node c_node; 66 struct rb_node c_node;
@@ -152,8 +151,8 @@ static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
152 while ((node = rb_last(root)) != NULL) { 151 while ((node = rb_last(root)) != NULL) {
153 item = rb_entry(node, struct ocfs2_meta_cache_item, c_node); 152 item = rb_entry(node, struct ocfs2_meta_cache_item, c_node);
154 153
155 mlog(0, "Purge item %llu\n", 154 trace_ocfs2_purge_copied_metadata_tree(
156 (unsigned long long) item->c_block); 155 (unsigned long long) item->c_block);
157 156
158 rb_erase(&item->c_node, root); 157 rb_erase(&item->c_node, root);
159 kmem_cache_free(ocfs2_uptodate_cachep, item); 158 kmem_cache_free(ocfs2_uptodate_cachep, item);
@@ -180,9 +179,9 @@ void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
180 tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE); 179 tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
181 to_purge = ci->ci_num_cached; 180 to_purge = ci->ci_num_cached;
182 181
183 mlog(0, "Purge %u %s items from Owner %llu\n", to_purge, 182 trace_ocfs2_metadata_cache_purge(
184 tree ? "array" : "tree", 183 (unsigned long long)ocfs2_metadata_cache_owner(ci),
185 (unsigned long long)ocfs2_metadata_cache_owner(ci)); 184 to_purge, tree);
186 185
187 /* If we're a tree, save off the root so that we can safely 186 /* If we're a tree, save off the root so that we can safely
188 * initialize the cache. We do the work to free tree members 187 * initialize the cache. We do the work to free tree members
@@ -249,10 +248,10 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
249 248
250 ocfs2_metadata_cache_lock(ci); 249 ocfs2_metadata_cache_lock(ci);
251 250
252 mlog(0, "Owner %llu, query block %llu (inline = %u)\n", 251 trace_ocfs2_buffer_cached_begin(
253 (unsigned long long)ocfs2_metadata_cache_owner(ci), 252 (unsigned long long)ocfs2_metadata_cache_owner(ci),
254 (unsigned long long) bh->b_blocknr, 253 (unsigned long long) bh->b_blocknr,
255 !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE)); 254 !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
256 255
257 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) 256 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
258 index = ocfs2_search_cache_array(ci, bh->b_blocknr); 257 index = ocfs2_search_cache_array(ci, bh->b_blocknr);
@@ -261,7 +260,7 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
261 260
262 ocfs2_metadata_cache_unlock(ci); 261 ocfs2_metadata_cache_unlock(ci);
263 262
264 mlog(0, "index = %d, item = %p\n", index, item); 263 trace_ocfs2_buffer_cached_end(index, item);
265 264
266 return (index != -1) || (item != NULL); 265 return (index != -1) || (item != NULL);
267} 266}
@@ -306,8 +305,9 @@ static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
306{ 305{
307 BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY); 306 BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY);
308 307
309 mlog(0, "block %llu takes position %u\n", (unsigned long long) block, 308 trace_ocfs2_append_cache_array(
310 ci->ci_num_cached); 309 (unsigned long long)ocfs2_metadata_cache_owner(ci),
310 (unsigned long long)block, ci->ci_num_cached);
311 311
312 ci->ci_cache.ci_array[ci->ci_num_cached] = block; 312 ci->ci_cache.ci_array[ci->ci_num_cached] = block;
313 ci->ci_num_cached++; 313 ci->ci_num_cached++;
@@ -324,8 +324,9 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
324 struct rb_node **p = &ci->ci_cache.ci_tree.rb_node; 324 struct rb_node **p = &ci->ci_cache.ci_tree.rb_node;
325 struct ocfs2_meta_cache_item *tmp; 325 struct ocfs2_meta_cache_item *tmp;
326 326
327 mlog(0, "Insert block %llu num = %u\n", (unsigned long long) block, 327 trace_ocfs2_insert_cache_tree(
328 ci->ci_num_cached); 328 (unsigned long long)ocfs2_metadata_cache_owner(ci),
329 (unsigned long long)block, ci->ci_num_cached);
329 330
330 while(*p) { 331 while(*p) {
331 parent = *p; 332 parent = *p;
@@ -389,9 +390,9 @@ static void ocfs2_expand_cache(struct ocfs2_caching_info *ci,
389 tree[i] = NULL; 390 tree[i] = NULL;
390 } 391 }
391 392
392 mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n", 393 trace_ocfs2_expand_cache(
393 (unsigned long long)ocfs2_metadata_cache_owner(ci), 394 (unsigned long long)ocfs2_metadata_cache_owner(ci),
394 ci->ci_flags, ci->ci_num_cached); 395 ci->ci_flags, ci->ci_num_cached);
395} 396}
396 397
397/* Slow path function - memory allocation is necessary. See the 398/* Slow path function - memory allocation is necessary. See the
@@ -405,9 +406,9 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
405 struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] = 406 struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
406 { NULL, }; 407 { NULL, };
407 408
408 mlog(0, "Owner %llu, block %llu, expand = %d\n", 409 trace_ocfs2_set_buffer_uptodate(
409 (unsigned long long)ocfs2_metadata_cache_owner(ci), 410 (unsigned long long)ocfs2_metadata_cache_owner(ci),
410 (unsigned long long)block, expand_tree); 411 (unsigned long long)block, expand_tree);
411 412
412 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); 413 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
413 if (!new) { 414 if (!new) {
@@ -433,7 +434,6 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
433 434
434 ocfs2_metadata_cache_lock(ci); 435 ocfs2_metadata_cache_lock(ci);
435 if (ocfs2_insert_can_use_array(ci)) { 436 if (ocfs2_insert_can_use_array(ci)) {
436 mlog(0, "Someone cleared the tree underneath us\n");
437 /* Ok, items were removed from the cache in between 437 /* Ok, items were removed from the cache in between
438 * locks. Detect this and revert back to the fast path */ 438 * locks. Detect this and revert back to the fast path */
439 ocfs2_append_cache_array(ci, block); 439 ocfs2_append_cache_array(ci, block);
@@ -490,9 +490,9 @@ void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
490 if (ocfs2_buffer_cached(ci, bh)) 490 if (ocfs2_buffer_cached(ci, bh))
491 return; 491 return;
492 492
493 mlog(0, "Owner %llu, inserting block %llu\n", 493 trace_ocfs2_set_buffer_uptodate_begin(
494 (unsigned long long)ocfs2_metadata_cache_owner(ci), 494 (unsigned long long)ocfs2_metadata_cache_owner(ci),
495 (unsigned long long)bh->b_blocknr); 495 (unsigned long long)bh->b_blocknr);
496 496
497 /* No need to recheck under spinlock - insertion is guarded by 497 /* No need to recheck under spinlock - insertion is guarded by
498 * co_io_lock() */ 498 * co_io_lock() */
@@ -542,8 +542,9 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
542 BUG_ON(index >= ci->ci_num_cached); 542 BUG_ON(index >= ci->ci_num_cached);
543 BUG_ON(!ci->ci_num_cached); 543 BUG_ON(!ci->ci_num_cached);
544 544
545 mlog(0, "remove index %d (num_cached = %u\n", index, 545 trace_ocfs2_remove_metadata_array(
546 ci->ci_num_cached); 546 (unsigned long long)ocfs2_metadata_cache_owner(ci),
547 index, ci->ci_num_cached);
547 548
548 ci->ci_num_cached--; 549 ci->ci_num_cached--;
549 550
@@ -559,8 +560,9 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
559static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci, 560static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
560 struct ocfs2_meta_cache_item *item) 561 struct ocfs2_meta_cache_item *item)
561{ 562{
562 mlog(0, "remove block %llu from tree\n", 563 trace_ocfs2_remove_metadata_tree(
563 (unsigned long long) item->c_block); 564 (unsigned long long)ocfs2_metadata_cache_owner(ci),
565 (unsigned long long)item->c_block);
564 566
565 rb_erase(&item->c_node, &ci->ci_cache.ci_tree); 567 rb_erase(&item->c_node, &ci->ci_cache.ci_tree);
566 ci->ci_num_cached--; 568 ci->ci_num_cached--;
@@ -573,10 +575,10 @@ static void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci,
573 struct ocfs2_meta_cache_item *item = NULL; 575 struct ocfs2_meta_cache_item *item = NULL;
574 576
575 ocfs2_metadata_cache_lock(ci); 577 ocfs2_metadata_cache_lock(ci);
576 mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n", 578 trace_ocfs2_remove_block_from_cache(
577 (unsigned long long)ocfs2_metadata_cache_owner(ci), 579 (unsigned long long)ocfs2_metadata_cache_owner(ci),
578 (unsigned long long) block, ci->ci_num_cached, 580 (unsigned long long) block, ci->ci_num_cached,
579 ci->ci_flags & OCFS2_CACHE_FL_INLINE); 581 ci->ci_flags);
580 582
581 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) { 583 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
582 index = ocfs2_search_cache_array(ci, block); 584 index = ocfs2_search_cache_array(ci, block);
@@ -626,9 +628,6 @@ int __init init_ocfs2_uptodate_cache(void)
626 if (!ocfs2_uptodate_cachep) 628 if (!ocfs2_uptodate_cachep)
627 return -ENOMEM; 629 return -ENOMEM;
628 630
629 mlog(0, "%u inlined cache items per inode.\n",
630 OCFS2_CACHE_INFO_MAX_ARRAY);
631
632 return 0; 631 return 0;
633} 632}
634 633
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6bb602486c6b..81ecf9c0bf0a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -37,7 +37,6 @@
37#include <linux/string.h> 37#include <linux/string.h>
38#include <linux/security.h> 38#include <linux/security.h>
39 39
40#define MLOG_MASK_PREFIX ML_XATTR
41#include <cluster/masklog.h> 40#include <cluster/masklog.h>
42 41
43#include "ocfs2.h" 42#include "ocfs2.h"
@@ -57,6 +56,7 @@
57#include "xattr.h" 56#include "xattr.h"
58#include "refcounttree.h" 57#include "refcounttree.h"
59#include "acl.h" 58#include "acl.h"
59#include "ocfs2_trace.h"
60 60
61struct ocfs2_xattr_def_value_root { 61struct ocfs2_xattr_def_value_root {
62 struct ocfs2_xattr_value_root xv; 62 struct ocfs2_xattr_value_root xv;
@@ -474,8 +474,7 @@ static int ocfs2_validate_xattr_block(struct super_block *sb,
474 struct ocfs2_xattr_block *xb = 474 struct ocfs2_xattr_block *xb =
475 (struct ocfs2_xattr_block *)bh->b_data; 475 (struct ocfs2_xattr_block *)bh->b_data;
476 476
477 mlog(0, "Validating xattr block %llu\n", 477 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
478 (unsigned long long)bh->b_blocknr);
479 478
480 BUG_ON(!buffer_uptodate(bh)); 479 BUG_ON(!buffer_uptodate(bh));
481 480
@@ -715,11 +714,11 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 714 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
716 struct ocfs2_extent_tree et; 715 struct ocfs2_extent_tree et;
717 716
718 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
719
720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 717 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
721 718
722 while (clusters_to_add) { 719 while (clusters_to_add) {
720 trace_ocfs2_xattr_extend_allocation(clusters_to_add);
721
723 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
724 OCFS2_JOURNAL_ACCESS_WRITE); 723 OCFS2_JOURNAL_ACCESS_WRITE);
725 if (status < 0) { 724 if (status < 0) {
@@ -754,8 +753,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
754 */ 753 */
755 BUG_ON(why == RESTART_META); 754 BUG_ON(why == RESTART_META);
756 755
757 mlog(0, "restarting xattr value extension for %u"
758 " clusters,.\n", clusters_to_add);
759 credits = ocfs2_calc_extend_credits(inode->i_sb, 756 credits = ocfs2_calc_extend_credits(inode->i_sb,
760 &vb->vb_xv->xr_list, 757 &vb->vb_xv->xr_list,
761 clusters_to_add); 758 clusters_to_add);
@@ -3246,8 +3243,8 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3246 } 3243 }
3247 3244
3248 meta_add += extra_meta; 3245 meta_add += extra_meta;
3249 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " 3246 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3250 "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits); 3247 clusters_add, *credits);
3251 3248
3252 if (meta_add) { 3249 if (meta_add) {
3253 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3250 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
@@ -3557,7 +3554,7 @@ int ocfs2_xattr_set(struct inode *inode,
3557 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3554 down_write(&OCFS2_I(inode)->ip_xattr_sem);
3558 /* 3555 /*
3559 * Scan inode and external block to find the same name 3556 * Scan inode and external block to find the same name
3560 * extended attribute and collect search infomation. 3557 * extended attribute and collect search information.
3561 */ 3558 */
3562 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3559 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3563 if (ret) 3560 if (ret)
@@ -3581,7 +3578,7 @@ int ocfs2_xattr_set(struct inode *inode,
3581 goto cleanup; 3578 goto cleanup;
3582 } 3579 }
3583 3580
3584 /* Check whether the value is refcounted and do some prepartion. */ 3581 /* Check whether the value is refcounted and do some preparation. */
3585 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3582 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3586 (!xis.not_found || !xbs.not_found)) { 3583 (!xis.not_found || !xbs.not_found)) {
3587 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3584 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
@@ -3887,8 +3884,10 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
3887 3884
3888 if (found) { 3885 if (found) {
3889 xs->here = &xs->header->xh_entries[index]; 3886 xs->here = &xs->header->xh_entries[index];
3890 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, 3887 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3891 (unsigned long long)bucket_blkno(xs->bucket), index); 3888 name, name_index, name_hash,
3889 (unsigned long long)bucket_blkno(xs->bucket),
3890 index);
3892 } else 3891 } else
3893 ret = -ENODATA; 3892 ret = -ENODATA;
3894 3893
@@ -3915,8 +3914,10 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
3915 if (le16_to_cpu(el->l_next_free_rec) == 0) 3914 if (le16_to_cpu(el->l_next_free_rec) == 0)
3916 return -ENODATA; 3915 return -ENODATA;
3917 3916
3918 mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n", 3917 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3919 name, name_hash, name_index); 3918 name, name_index, name_hash,
3919 (unsigned long long)root_bh->b_blocknr,
3920 -1);
3920 3921
3921 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3922 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3922 &num_clusters, el); 3923 &num_clusters, el);
@@ -3927,9 +3928,10 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
3927 3928
3928 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3929 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3929 3930
3930 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " 3931 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3931 "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno, 3932 name, name_index, first_hash,
3932 first_hash); 3933 (unsigned long long)p_blkno,
3934 num_clusters);
3933 3935
3934 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3936 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3935 p_blkno, first_hash, num_clusters, xs); 3937 p_blkno, first_hash, num_clusters, xs);
@@ -3955,8 +3957,9 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3955 return -ENOMEM; 3957 return -ENOMEM;
3956 } 3958 }
3957 3959
3958 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", 3960 trace_ocfs2_iterate_xattr_buckets(
3959 clusters, (unsigned long long)blkno); 3961 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3962 (unsigned long long)blkno, clusters);
3960 3963
3961 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3964 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3962 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3965 ret = ocfs2_read_xattr_bucket(bucket, blkno);
@@ -3972,8 +3975,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3972 if (i == 0) 3975 if (i == 0)
3973 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3976 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3974 3977
3975 mlog(0, "iterating xattr bucket %llu, first hash %u\n", 3978 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
3976 (unsigned long long)blkno,
3977 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3979 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3978 if (func) { 3980 if (func) {
3979 ret = func(inode, bucket, para); 3981 ret = func(inode, bucket, para);
@@ -4173,9 +4175,9 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4173 char *src = xb_bh->b_data; 4175 char *src = xb_bh->b_data;
4174 char *target = bucket_block(bucket, blks - 1); 4176 char *target = bucket_block(bucket, blks - 1);
4175 4177
4176 mlog(0, "cp xattr from block %llu to bucket %llu\n", 4178 trace_ocfs2_cp_xattr_block_to_bucket_begin(
4177 (unsigned long long)xb_bh->b_blocknr, 4179 (unsigned long long)xb_bh->b_blocknr,
4178 (unsigned long long)bucket_blkno(bucket)); 4180 (unsigned long long)bucket_blkno(bucket));
4179 4181
4180 for (i = 0; i < blks; i++) 4182 for (i = 0; i < blks; i++)
4181 memset(bucket_block(bucket, i), 0, blocksize); 4183 memset(bucket_block(bucket, i), 0, blocksize);
@@ -4211,8 +4213,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4211 for (i = 0; i < count; i++) 4213 for (i = 0; i < count; i++)
4212 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4214 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4213 4215
4214 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n", 4216 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4215 offset, size, off_change);
4216 4217
4217 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4218 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4218 cmp_xe, swap_xe); 4219 cmp_xe, swap_xe);
@@ -4261,8 +4262,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4261 struct ocfs2_xattr_tree_root *xr; 4262 struct ocfs2_xattr_tree_root *xr;
4262 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4263 u16 xb_flags = le16_to_cpu(xb->xb_flags);
4263 4264
4264 mlog(0, "create xattr index block for %llu\n", 4265 trace_ocfs2_xattr_create_index_block_begin(
4265 (unsigned long long)xb_bh->b_blocknr); 4266 (unsigned long long)xb_bh->b_blocknr);
4266 4267
4267 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4268 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4268 BUG_ON(!xs->bucket); 4269 BUG_ON(!xs->bucket);
@@ -4295,8 +4296,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4295 */ 4296 */
4296 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4297 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4297 4298
4298 mlog(0, "allocate 1 cluster from %llu to xattr block\n", 4299 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4299 (unsigned long long)blkno);
4300 4300
4301 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); 4301 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4302 if (ret) { 4302 if (ret) {
@@ -4400,8 +4400,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4400 entries = (char *)xh->xh_entries; 4400 entries = (char *)xh->xh_entries;
4401 xh_free_start = le16_to_cpu(xh->xh_free_start); 4401 xh_free_start = le16_to_cpu(xh->xh_free_start);
4402 4402
4403 mlog(0, "adjust xattr bucket in %llu, count = %u, " 4403 trace_ocfs2_defrag_xattr_bucket(
4404 "xh_free_start = %u, xh_name_value_len = %u.\n",
4405 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4404 (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4406 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4405 xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4407 4406
@@ -4503,8 +4502,9 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4503 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4502 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4504 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4503 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4505 4504
4506 mlog(0, "move half of xattrs in cluster %llu to %llu\n", 4505 trace_ocfs2_mv_xattr_bucket_cross_cluster(
4507 (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno); 4506 (unsigned long long)last_cluster_blkno,
4507 (unsigned long long)new_blkno);
4508 4508
4509 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4509 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4510 last_cluster_blkno, new_blkno, 4510 last_cluster_blkno, new_blkno,
@@ -4614,8 +4614,8 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
4614 struct ocfs2_xattr_entry *xe; 4614 struct ocfs2_xattr_entry *xe;
4615 int blocksize = inode->i_sb->s_blocksize; 4615 int blocksize = inode->i_sb->s_blocksize;
4616 4616
4617 mlog(0, "move some of xattrs from bucket %llu to %llu\n", 4617 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4618 (unsigned long long)blk, (unsigned long long)new_blk); 4618 (unsigned long long)new_blk);
4619 4619
4620 s_bucket = ocfs2_xattr_bucket_new(inode); 4620 s_bucket = ocfs2_xattr_bucket_new(inode);
4621 t_bucket = ocfs2_xattr_bucket_new(inode); 4621 t_bucket = ocfs2_xattr_bucket_new(inode);
@@ -4714,9 +4714,9 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
4714 */ 4714 */
4715 xe = &xh->xh_entries[start]; 4715 xe = &xh->xh_entries[start];
4716 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4716 len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4717 mlog(0, "mv xattr entry len %d from %d to %d\n", len, 4717 trace_ocfs2_divide_xattr_bucket_move(len,
4718 (int)((char *)xe - (char *)xh), 4718 (int)((char *)xe - (char *)xh),
4719 (int)((char *)xh->xh_entries - (char *)xh)); 4719 (int)((char *)xh->xh_entries - (char *)xh));
4720 memmove((char *)xh->xh_entries, (char *)xe, len); 4720 memmove((char *)xh->xh_entries, (char *)xe, len);
4721 xe = &xh->xh_entries[count - start]; 4721 xe = &xh->xh_entries[count - start];
4722 len = sizeof(struct ocfs2_xattr_entry) * start; 4722 len = sizeof(struct ocfs2_xattr_entry) * start;
@@ -4788,9 +4788,9 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
4788 4788
4789 BUG_ON(s_blkno == t_blkno); 4789 BUG_ON(s_blkno == t_blkno);
4790 4790
4791 mlog(0, "cp bucket %llu to %llu, target is %d\n", 4791 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4792 (unsigned long long)s_blkno, (unsigned long long)t_blkno, 4792 (unsigned long long)t_blkno,
4793 t_is_new); 4793 t_is_new);
4794 4794
4795 s_bucket = ocfs2_xattr_bucket_new(inode); 4795 s_bucket = ocfs2_xattr_bucket_new(inode);
4796 t_bucket = ocfs2_xattr_bucket_new(inode); 4796 t_bucket = ocfs2_xattr_bucket_new(inode);
@@ -4862,8 +4862,8 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4862 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4862 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4863 struct ocfs2_xattr_bucket *old_first, *new_first; 4863 struct ocfs2_xattr_bucket *old_first, *new_first;
4864 4864
4865 mlog(0, "mv xattrs from cluster %llu to %llu\n", 4865 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4866 (unsigned long long)last_blk, (unsigned long long)to_blk); 4866 (unsigned long long)to_blk);
4867 4867
4868 BUG_ON(start_bucket >= num_buckets); 4868 BUG_ON(start_bucket >= num_buckets);
4869 if (start_bucket) { 4869 if (start_bucket) {
@@ -5013,9 +5013,9 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5013{ 5013{
5014 int ret; 5014 int ret;
5015 5015
5016 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", 5016 trace_ocfs2_adjust_xattr_cross_cluster(
5017 (unsigned long long)bucket_blkno(first), prev_clusters, 5017 (unsigned long long)bucket_blkno(first),
5018 (unsigned long long)new_blk); 5018 (unsigned long long)new_blk, prev_clusters);
5019 5019
5020 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5020 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5021 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5021 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
@@ -5088,10 +5088,10 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5088 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5088 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5089 struct ocfs2_extent_tree et; 5089 struct ocfs2_extent_tree et;
5090 5090
5091 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " 5091 trace_ocfs2_add_new_xattr_cluster_begin(
5092 "previous xattr blkno = %llu\n", 5092 (unsigned long long)OCFS2_I(inode)->ip_blkno,
5093 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5093 (unsigned long long)bucket_blkno(first),
5094 prev_cpos, (unsigned long long)bucket_blkno(first)); 5094 prev_cpos, prev_clusters);
5095 5095
5096 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5096 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5097 5097
@@ -5113,8 +5113,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5113 BUG_ON(num_bits > clusters_to_add); 5113 BUG_ON(num_bits > clusters_to_add);
5114 5114
5115 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5115 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5116 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", 5116 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5117 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5118 5117
5119 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5118 if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5120 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5119 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
@@ -5130,8 +5129,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5130 */ 5129 */
5131 v_start = prev_cpos + prev_clusters; 5130 v_start = prev_cpos + prev_clusters;
5132 *num_clusters = prev_clusters + num_bits; 5131 *num_clusters = prev_clusters + num_bits;
5133 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5134 num_bits);
5135 } else { 5132 } else {
5136 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5133 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5137 handle, 5134 handle,
@@ -5147,8 +5144,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5147 } 5144 }
5148 } 5145 }
5149 5146
5150 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 5147 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5151 num_bits, (unsigned long long)block, v_start); 5148 v_start, num_bits);
5152 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5149 ret = ocfs2_insert_extent(handle, &et, v_start, block,
5153 num_bits, 0, ctxt->meta_ac); 5150 num_bits, 0, ctxt->meta_ac);
5154 if (ret < 0) { 5151 if (ret < 0) {
@@ -5183,9 +5180,9 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
5183 u64 end_blk; 5180 u64 end_blk;
5184 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5181 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5185 5182
5186 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " 5183 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5187 "from %llu, len = %u\n", (unsigned long long)target_blk, 5184 (unsigned long long)bucket_blkno(first),
5188 (unsigned long long)bucket_blkno(first), num_clusters); 5185 num_clusters, new_bucket);
5189 5186
5190 /* The extent must have room for an additional bucket */ 5187 /* The extent must have room for an additional bucket */
5191 BUG_ON(new_bucket >= 5188 BUG_ON(new_bucket >=
@@ -5265,8 +5262,8 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5265 /* The bucket at the front of the extent */ 5262 /* The bucket at the front of the extent */
5266 struct ocfs2_xattr_bucket *first; 5263 struct ocfs2_xattr_bucket *first;
5267 5264
5268 mlog(0, "Add new xattr bucket starting from %llu\n", 5265 trace_ocfs2_add_new_xattr_bucket(
5269 (unsigned long long)bucket_blkno(target)); 5266 (unsigned long long)bucket_blkno(target));
5270 5267
5271 /* The first bucket of the original extent */ 5268 /* The first bucket of the original extent */
5272 first = ocfs2_xattr_bucket_new(inode); 5269 first = ocfs2_xattr_bucket_new(inode);
@@ -5382,8 +5379,8 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5382 * modified something. We have to assume they did, and dirty 5379 * modified something. We have to assume they did, and dirty
5383 * the whole bucket. This leaves us in a consistent state. 5380 * the whole bucket. This leaves us in a consistent state.
5384 */ 5381 */
5385 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", 5382 trace_ocfs2_xattr_bucket_value_truncate(
5386 xe_off, (unsigned long long)bucket_blkno(bucket), len); 5383 (unsigned long long)bucket_blkno(bucket), xe_off, len);
5387 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5384 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5388 if (ret) { 5385 if (ret) {
5389 mlog_errno(ret); 5386 mlog_errno(ret);
@@ -5433,8 +5430,9 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
5433 5430
5434 ocfs2_init_dealloc_ctxt(&dealloc); 5431 ocfs2_init_dealloc_ctxt(&dealloc);
5435 5432
5436 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", 5433 trace_ocfs2_rm_xattr_cluster(
5437 cpos, len, (unsigned long long)blkno); 5434 (unsigned long long)OCFS2_I(inode)->ip_blkno,
5435 (unsigned long long)blkno, cpos, len);
5438 5436
5439 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5437 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5440 len); 5438 len);
@@ -5538,7 +5536,7 @@ static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5538 int ret; 5536 int ret;
5539 struct ocfs2_xa_loc loc; 5537 struct ocfs2_xa_loc loc;
5540 5538
5541 mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name); 5539 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5542 5540
5543 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5541 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5544 xs->not_found ? NULL : xs->here); 5542 xs->not_found ? NULL : xs->here);
@@ -5570,7 +5568,6 @@ static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5570 5568
5571 5569
5572out: 5570out:
5573 mlog_exit(ret);
5574 return ret; 5571 return ret;
5575} 5572}
5576 5573
@@ -5581,7 +5578,7 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5581{ 5578{
5582 int ret; 5579 int ret;
5583 5580
5584 mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name); 5581 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5585 5582
5586 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5583 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5587 if (!ret) 5584 if (!ret)
@@ -5637,7 +5634,6 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5637 mlog_errno(ret); 5634 mlog_errno(ret);
5638 5635
5639out: 5636out:
5640 mlog_exit(ret);
5641 return ret; 5637 return ret;
5642} 5638}
5643 5639
@@ -6041,9 +6037,9 @@ static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6041 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6037 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6042 p = &refcount; 6038 p = &refcount;
6043 6039
6044 mlog(0, "refcount bucket %llu, count = %u\n", 6040 trace_ocfs2_xattr_bucket_value_refcount(
6045 (unsigned long long)bucket_blkno(bucket), 6041 (unsigned long long)bucket_blkno(bucket),
6046 le16_to_cpu(xh->xh_count)); 6042 le16_to_cpu(xh->xh_count));
6047 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6043 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6048 xe = &xh->xh_entries[i]; 6044 xe = &xh->xh_entries[i];
6049 6045
@@ -6339,8 +6335,8 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
6339 u32 clusters, cpos, p_cluster, num_clusters; 6335 u32 clusters, cpos, p_cluster, num_clusters;
6340 unsigned int ext_flags = 0; 6336 unsigned int ext_flags = 0;
6341 6337
6342 mlog(0, "reflink xattr in container %llu, count = %u\n", 6338 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6343 (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count)); 6339 le16_to_cpu(xh->xh_count));
6344 6340
6345 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6341 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6346 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6342 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
@@ -6540,8 +6536,8 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
6540 goto out; 6536 goto out;
6541 } 6537 }
6542 6538
6543 mlog(0, "create new xattr block for inode %llu, index = %d\n", 6539 trace_ocfs2_create_empty_xattr_block(
6544 (unsigned long long)fe_bh->b_blocknr, indexed); 6540 (unsigned long long)fe_bh->b_blocknr, indexed);
6545 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6541 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6546 ret_bh); 6542 ret_bh);
6547 if (ret) 6543 if (ret)
@@ -6952,8 +6948,8 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6952 if (ret) 6948 if (ret)
6953 mlog_errno(ret); 6949 mlog_errno(ret);
6954 6950
6955 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", 6951 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
6956 (unsigned long long)new_blkno, num_clusters, reflink_cpos); 6952 num_clusters, reflink_cpos);
6957 6953
6958 len -= num_clusters; 6954 len -= num_clusters;
6959 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6955 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
@@ -6982,8 +6978,7 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6982 struct ocfs2_alloc_context *data_ac = NULL; 6978 struct ocfs2_alloc_context *data_ac = NULL;
6983 struct ocfs2_extent_tree et; 6979 struct ocfs2_extent_tree et;
6984 6980
6985 mlog(0, "reflink xattr buckets %llu len %u\n", 6981 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
6986 (unsigned long long)blkno, len);
6987 6982
6988 ocfs2_init_xattr_tree_extent_tree(&et, 6983 ocfs2_init_xattr_tree_extent_tree(&et,
6989 INODE_CACHE(args->reflink->new_inode), 6984 INODE_CACHE(args->reflink->new_inode),
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 8a6d34fa668a..d738a7e493dd 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -372,7 +372,6 @@ const struct address_space_operations omfs_aops = {
372 .readpages = omfs_readpages, 372 .readpages = omfs_readpages,
373 .writepage = omfs_writepage, 373 .writepage = omfs_writepage,
374 .writepages = omfs_writepages, 374 .writepages = omfs_writepages,
375 .sync_page = block_sync_page,
376 .write_begin = omfs_write_begin, 375 .write_begin = omfs_write_begin,
377 .write_end = generic_write_end, 376 .write_end = generic_write_end,
378 .bmap = omfs_bmap, 377 .bmap = omfs_bmap,
diff --git a/fs/open.c b/fs/open.c
index f83ca80cc59a..b52cf013ffa1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -835,17 +835,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
835 835
836 validate_creds(cred); 836 validate_creds(cred);
837 837
838 /* 838 /* We must always pass in a valid mount pointer. */
839 * We must always pass in a valid mount pointer. Historically 839 BUG_ON(!mnt);
840 * callers got away with not passing it, but we must enforce this at
841 * the earliest possible point now to avoid strange problems deep in the
842 * filesystem stack.
843 */
844 if (!mnt) {
845 printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__);
846 dump_stack();
847 return ERR_PTR(-EINVAL);
848 }
849 840
850 error = -ENFILE; 841 error = -ENFILE;
851 f = get_empty_filp(); 842 f = get_empty_filp();
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9c21119512b9..d545e97d99c3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -290,7 +290,8 @@ ssize_t part_inflight_show(struct device *dev,
290{ 290{
291 struct hd_struct *p = dev_to_part(dev); 291 struct hd_struct *p = dev_to_part(dev);
292 292
293 return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); 293 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
294 atomic_read(&p->in_flight[1]));
294} 295}
295 296
296#ifdef CONFIG_FAIL_MAKE_REQUEST 297#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -499,7 +500,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
499 /* everything is up and running, commence */ 500 /* everything is up and running, commence */
500 rcu_assign_pointer(ptbl->part[partno], p); 501 rcu_assign_pointer(ptbl->part[partno], p);
501 502
502 /* suppress uevent if the disk supresses it */ 503 /* suppress uevent if the disk suppresses it */
503 if (!dev_get_uevent_suppress(ddev)) 504 if (!dev_get_uevent_suppress(ddev))
504 kobject_uevent(&pdev->kobj, KOBJ_ADD); 505 kobject_uevent(&pdev->kobj, KOBJ_ADD);
505 506
@@ -584,7 +585,7 @@ rescan:
584 /* 585 /*
585 * If any partition code tried to read beyond EOD, try 586 * If any partition code tried to read beyond EOD, try
586 * unlocking native capacity even if partition table is 587 * unlocking native capacity even if partition table is
587 * sucessfully read as we could be missing some partitions. 588 * successfully read as we could be missing some partitions.
588 */ 589 */
589 if (state->access_beyond_eod) { 590 if (state->access_beyond_eod) {
590 printk(KERN_WARNING 591 printk(KERN_WARNING
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index ea648b913beb..410df07ae96d 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1299 1299
1300 BUG_ON (!data || !frags); 1300 BUG_ON (!data || !frags);
1301 1301
1302 if (size < 2 * VBLK_SIZE_HEAD) {
1303 ldm_error("Value of size is to small.");
1304 return false;
1305 }
1306
1302 group = get_unaligned_be32(data + 0x08); 1307 group = get_unaligned_be32(data + 0x08);
1303 rec = get_unaligned_be16(data + 0x0C); 1308 rec = get_unaligned_be16(data + 0x0C);
1304 num = get_unaligned_be16(data + 0x0E); 1309 num = get_unaligned_be16(data + 0x0E);
@@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1306 ldm_error ("A VBLK claims to have %d parts.", num); 1311 ldm_error ("A VBLK claims to have %d parts.", num);
1307 return false; 1312 return false;
1308 } 1313 }
1314 if (rec >= num) {
1315 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
1316 return false;
1317 }
1309 1318
1310 list_for_each (item, frags) { 1319 list_for_each (item, frags) {
1311 f = list_entry (item, struct frag, list); 1320 f = list_entry (item, struct frag, list);
@@ -1334,10 +1343,9 @@ found:
1334 1343
1335 f->map |= (1 << rec); 1344 f->map |= (1 << rec);
1336 1345
1337 if (num > 0) { 1346 data += VBLK_SIZE_HEAD;
1338 data += VBLK_SIZE_HEAD; 1347 size -= VBLK_SIZE_HEAD;
1339 size -= VBLK_SIZE_HEAD; 1348
1340 }
1341 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size); 1349 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
1342 1350
1343 return true; 1351 return true;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7c99c1cf7e5c..5e4f776b0917 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -489,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
489 vsize, 489 vsize,
490 mm ? get_mm_rss(mm) : 0, 490 mm ? get_mm_rss(mm) : 0,
491 rsslim, 491 rsslim,
492 mm ? mm->start_code : 0, 492 mm ? (permitted ? mm->start_code : 1) : 0,
493 mm ? mm->end_code : 0, 493 mm ? (permitted ? mm->end_code : 1) : 0,
494 (permitted && mm) ? mm->start_stack : 0, 494 (permitted && mm) ? mm->start_stack : 0,
495 esp, 495 esp,
496 eip, 496 eip,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d49c4b5d2c3e..dfa532730e55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -191,17 +191,20 @@ static int proc_root_link(struct inode *inode, struct path *path)
191 return result; 191 return result;
192} 192}
193 193
194/* 194static struct mm_struct *__check_mem_permission(struct task_struct *task)
195 * Return zero if current may access user memory in @task, -error if not.
196 */
197static int check_mem_permission(struct task_struct *task)
198{ 195{
196 struct mm_struct *mm;
197
198 mm = get_task_mm(task);
199 if (!mm)
200 return ERR_PTR(-EINVAL);
201
199 /* 202 /*
200 * A task can always look at itself, in case it chooses 203 * A task can always look at itself, in case it chooses
201 * to use system calls instead of load instructions. 204 * to use system calls instead of load instructions.
202 */ 205 */
203 if (task == current) 206 if (task == current)
204 return 0; 207 return mm;
205 208
206 /* 209 /*
207 * If current is actively ptrace'ing, and would also be 210 * If current is actively ptrace'ing, and would also be
@@ -213,27 +216,53 @@ static int check_mem_permission(struct task_struct *task)
213 match = (tracehook_tracer_task(task) == current); 216 match = (tracehook_tracer_task(task) == current);
214 rcu_read_unlock(); 217 rcu_read_unlock();
215 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 218 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
216 return 0; 219 return mm;
217 } 220 }
218 221
219 /* 222 /*
220 * Noone else is allowed. 223 * No one else is allowed.
224 */
225 mmput(mm);
226 return ERR_PTR(-EPERM);
227}
228
229/*
230 * If current may access user memory in @task return a reference to the
231 * corresponding mm, otherwise ERR_PTR.
232 */
233static struct mm_struct *check_mem_permission(struct task_struct *task)
234{
235 struct mm_struct *mm;
236 int err;
237
238 /*
239 * Avoid racing if task exec's as we might get a new mm but validate
240 * against old credentials.
221 */ 241 */
222 return -EPERM; 242 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
243 if (err)
244 return ERR_PTR(err);
245
246 mm = __check_mem_permission(task);
247 mutex_unlock(&task->signal->cred_guard_mutex);
248
249 return mm;
223} 250}
224 251
225struct mm_struct *mm_for_maps(struct task_struct *task) 252struct mm_struct *mm_for_maps(struct task_struct *task)
226{ 253{
227 struct mm_struct *mm; 254 struct mm_struct *mm;
255 int err;
228 256
229 if (mutex_lock_killable(&task->signal->cred_guard_mutex)) 257 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
230 return NULL; 258 if (err)
259 return ERR_PTR(err);
231 260
232 mm = get_task_mm(task); 261 mm = get_task_mm(task);
233 if (mm && mm != current->mm && 262 if (mm && mm != current->mm &&
234 !ptrace_may_access(task, PTRACE_MODE_READ)) { 263 !ptrace_may_access(task, PTRACE_MODE_READ)) {
235 mmput(mm); 264 mmput(mm);
236 mm = NULL; 265 mm = ERR_PTR(-EACCES);
237 } 266 }
238 mutex_unlock(&task->signal->cred_guard_mutex); 267 mutex_unlock(&task->signal->cred_guard_mutex);
239 268
@@ -279,9 +308,9 @@ out:
279 308
280static int proc_pid_auxv(struct task_struct *task, char *buffer) 309static int proc_pid_auxv(struct task_struct *task, char *buffer)
281{ 310{
282 int res = 0; 311 struct mm_struct *mm = mm_for_maps(task);
283 struct mm_struct *mm = get_task_mm(task); 312 int res = PTR_ERR(mm);
284 if (mm) { 313 if (mm && !IS_ERR(mm)) {
285 unsigned int nwords = 0; 314 unsigned int nwords = 0;
286 do { 315 do {
287 nwords += 2; 316 nwords += 2;
@@ -318,6 +347,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
318} 347}
319#endif /* CONFIG_KALLSYMS */ 348#endif /* CONFIG_KALLSYMS */
320 349
350static int lock_trace(struct task_struct *task)
351{
352 int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
353 if (err)
354 return err;
355 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
356 mutex_unlock(&task->signal->cred_guard_mutex);
357 return -EPERM;
358 }
359 return 0;
360}
361
362static void unlock_trace(struct task_struct *task)
363{
364 mutex_unlock(&task->signal->cred_guard_mutex);
365}
366
321#ifdef CONFIG_STACKTRACE 367#ifdef CONFIG_STACKTRACE
322 368
323#define MAX_STACK_TRACE_DEPTH 64 369#define MAX_STACK_TRACE_DEPTH 64
@@ -327,6 +373,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
327{ 373{
328 struct stack_trace trace; 374 struct stack_trace trace;
329 unsigned long *entries; 375 unsigned long *entries;
376 int err;
330 int i; 377 int i;
331 378
332 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 379 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
@@ -337,15 +384,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
337 trace.max_entries = MAX_STACK_TRACE_DEPTH; 384 trace.max_entries = MAX_STACK_TRACE_DEPTH;
338 trace.entries = entries; 385 trace.entries = entries;
339 trace.skip = 0; 386 trace.skip = 0;
340 save_stack_trace_tsk(task, &trace);
341 387
342 for (i = 0; i < trace.nr_entries; i++) { 388 err = lock_trace(task);
343 seq_printf(m, "[<%p>] %pS\n", 389 if (!err) {
344 (void *)entries[i], (void *)entries[i]); 390 save_stack_trace_tsk(task, &trace);
391
392 for (i = 0; i < trace.nr_entries; i++) {
393 seq_printf(m, "[<%pK>] %pS\n",
394 (void *)entries[i], (void *)entries[i]);
395 }
396 unlock_trace(task);
345 } 397 }
346 kfree(entries); 398 kfree(entries);
347 399
348 return 0; 400 return err;
349} 401}
350#endif 402#endif
351 403
@@ -508,18 +560,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
508{ 560{
509 long nr; 561 long nr;
510 unsigned long args[6], sp, pc; 562 unsigned long args[6], sp, pc;
563 int res = lock_trace(task);
564 if (res)
565 return res;
511 566
512 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 567 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
513 return sprintf(buffer, "running\n"); 568 res = sprintf(buffer, "running\n");
514 569 else if (nr < 0)
515 if (nr < 0) 570 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
516 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 571 else
517 572 res = sprintf(buffer,
518 return sprintf(buffer,
519 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 573 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
520 nr, 574 nr,
521 args[0], args[1], args[2], args[3], args[4], args[5], 575 args[0], args[1], args[2], args[3], args[4], args[5],
522 sp, pc); 576 sp, pc);
577 unlock_trace(task);
578 return res;
523} 579}
524#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 580#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
525 581
@@ -775,18 +831,14 @@ static ssize_t mem_read(struct file * file, char __user * buf,
775 if (!task) 831 if (!task)
776 goto out_no_task; 832 goto out_no_task;
777 833
778 if (check_mem_permission(task))
779 goto out;
780
781 ret = -ENOMEM; 834 ret = -ENOMEM;
782 page = (char *)__get_free_page(GFP_TEMPORARY); 835 page = (char *)__get_free_page(GFP_TEMPORARY);
783 if (!page) 836 if (!page)
784 goto out; 837 goto out;
785 838
786 ret = 0; 839 mm = check_mem_permission(task);
787 840 ret = PTR_ERR(mm);
788 mm = get_task_mm(task); 841 if (IS_ERR(mm))
789 if (!mm)
790 goto out_free; 842 goto out_free;
791 843
792 ret = -EIO; 844 ret = -EIO;
@@ -800,8 +852,8 @@ static ssize_t mem_read(struct file * file, char __user * buf,
800 int this_len, retval; 852 int this_len, retval;
801 853
802 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 854 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
803 retval = access_process_vm(task, src, page, this_len, 0); 855 retval = access_remote_vm(mm, src, page, this_len, 0);
804 if (!retval || check_mem_permission(task)) { 856 if (!retval) {
805 if (!ret) 857 if (!ret)
806 ret = -EIO; 858 ret = -EIO;
807 break; 859 break;
@@ -829,10 +881,6 @@ out_no_task:
829 return ret; 881 return ret;
830} 882}
831 883
832#define mem_write NULL
833
834#ifndef mem_write
835/* This is a security hazard */
836static ssize_t mem_write(struct file * file, const char __user *buf, 884static ssize_t mem_write(struct file * file, const char __user *buf,
837 size_t count, loff_t *ppos) 885 size_t count, loff_t *ppos)
838{ 886{
@@ -840,18 +888,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
840 char *page; 888 char *page;
841 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 889 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
842 unsigned long dst = *ppos; 890 unsigned long dst = *ppos;
891 struct mm_struct *mm;
843 892
844 copied = -ESRCH; 893 copied = -ESRCH;
845 if (!task) 894 if (!task)
846 goto out_no_task; 895 goto out_no_task;
847 896
848 if (check_mem_permission(task)) 897 mm = check_mem_permission(task);
849 goto out; 898 copied = PTR_ERR(mm);
899 if (IS_ERR(mm))
900 goto out_task;
901
902 copied = -EIO;
903 if (file->private_data != (void *)((long)current->self_exec_id))
904 goto out_mm;
850 905
851 copied = -ENOMEM; 906 copied = -ENOMEM;
852 page = (char *)__get_free_page(GFP_TEMPORARY); 907 page = (char *)__get_free_page(GFP_TEMPORARY);
853 if (!page) 908 if (!page)
854 goto out; 909 goto out_mm;
855 910
856 copied = 0; 911 copied = 0;
857 while (count > 0) { 912 while (count > 0) {
@@ -862,7 +917,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
862 copied = -EFAULT; 917 copied = -EFAULT;
863 break; 918 break;
864 } 919 }
865 retval = access_process_vm(task, dst, page, this_len, 1); 920 retval = access_remote_vm(mm, dst, page, this_len, 1);
866 if (!retval) { 921 if (!retval) {
867 if (!copied) 922 if (!copied)
868 copied = -EIO; 923 copied = -EIO;
@@ -875,12 +930,13 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
875 } 930 }
876 *ppos = dst; 931 *ppos = dst;
877 free_page((unsigned long) page); 932 free_page((unsigned long) page);
878out: 933out_mm:
934 mmput(mm);
935out_task:
879 put_task_struct(task); 936 put_task_struct(task);
880out_no_task: 937out_no_task:
881 return copied; 938 return copied;
882} 939}
883#endif
884 940
885loff_t mem_lseek(struct file *file, loff_t offset, int orig) 941loff_t mem_lseek(struct file *file, loff_t offset, int orig)
886{ 942{
@@ -917,20 +973,18 @@ static ssize_t environ_read(struct file *file, char __user *buf,
917 if (!task) 973 if (!task)
918 goto out_no_task; 974 goto out_no_task;
919 975
920 if (!ptrace_may_access(task, PTRACE_MODE_READ))
921 goto out;
922
923 ret = -ENOMEM; 976 ret = -ENOMEM;
924 page = (char *)__get_free_page(GFP_TEMPORARY); 977 page = (char *)__get_free_page(GFP_TEMPORARY);
925 if (!page) 978 if (!page)
926 goto out; 979 goto out;
927 980
928 ret = 0;
929 981
930 mm = get_task_mm(task); 982 mm = mm_for_maps(task);
931 if (!mm) 983 ret = PTR_ERR(mm);
984 if (!mm || IS_ERR(mm))
932 goto out_free; 985 goto out_free;
933 986
987 ret = 0;
934 while (count > 0) { 988 while (count > 0) {
935 int this_len, retval, max_len; 989 int this_len, retval, max_len;
936 990
@@ -2748,8 +2802,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2748static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2802static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2749 struct pid *pid, struct task_struct *task) 2803 struct pid *pid, struct task_struct *task)
2750{ 2804{
2751 seq_printf(m, "%08x\n", task->personality); 2805 int err = lock_trace(task);
2752 return 0; 2806 if (!err) {
2807 seq_printf(m, "%08x\n", task->personality);
2808 unlock_trace(task);
2809 }
2810 return err;
2753} 2811}
2754 2812
2755/* 2813/*
@@ -2768,7 +2826,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2768 REG("environ", S_IRUSR, proc_environ_operations), 2826 REG("environ", S_IRUSR, proc_environ_operations),
2769 INF("auxv", S_IRUSR, proc_pid_auxv), 2827 INF("auxv", S_IRUSR, proc_pid_auxv),
2770 ONE("status", S_IRUGO, proc_pid_status), 2828 ONE("status", S_IRUGO, proc_pid_status),
2771 ONE("personality", S_IRUSR, proc_pid_personality), 2829 ONE("personality", S_IRUGO, proc_pid_personality),
2772 INF("limits", S_IRUGO, proc_pid_limits), 2830 INF("limits", S_IRUGO, proc_pid_limits),
2773#ifdef CONFIG_SCHED_DEBUG 2831#ifdef CONFIG_SCHED_DEBUG
2774 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2832 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2778,7 +2836,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2778#endif 2836#endif
2779 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2837 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2780#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2838#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2781 INF("syscall", S_IRUSR, proc_pid_syscall), 2839 INF("syscall", S_IRUGO, proc_pid_syscall),
2782#endif 2840#endif
2783 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2841 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2784 ONE("stat", S_IRUGO, proc_tgid_stat), 2842 ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2797,7 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2797#ifdef CONFIG_PROC_PAGE_MONITOR 2855#ifdef CONFIG_PROC_PAGE_MONITOR
2798 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2856 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2799 REG("smaps", S_IRUGO, proc_smaps_operations), 2857 REG("smaps", S_IRUGO, proc_smaps_operations),
2800 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2858 REG("pagemap", S_IRUGO, proc_pagemap_operations),
2801#endif 2859#endif
2802#ifdef CONFIG_SECURITY 2860#ifdef CONFIG_SECURITY
2803 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2861 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2806,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2806 INF("wchan", S_IRUGO, proc_pid_wchan), 2864 INF("wchan", S_IRUGO, proc_pid_wchan),
2807#endif 2865#endif
2808#ifdef CONFIG_STACKTRACE 2866#ifdef CONFIG_STACKTRACE
2809 ONE("stack", S_IRUSR, proc_pid_stack), 2867 ONE("stack", S_IRUGO, proc_pid_stack),
2810#endif 2868#endif
2811#ifdef CONFIG_SCHEDSTATS 2869#ifdef CONFIG_SCHEDSTATS
2812 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2870 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3066,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
3066/* for the /proc/ directory itself, after non-process stuff has been done */ 3124/* for the /proc/ directory itself, after non-process stuff has been done */
3067int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3068{ 3126{
3069 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3127 unsigned int nr;
3070 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 3128 struct task_struct *reaper;
3071 struct tgid_iter iter; 3129 struct tgid_iter iter;
3072 struct pid_namespace *ns; 3130 struct pid_namespace *ns;
3073 3131
3132 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3133 goto out_no_task;
3134 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3135
3136 reaper = get_proc_task(filp->f_path.dentry->d_inode);
3074 if (!reaper) 3137 if (!reaper)
3075 goto out_no_task; 3138 goto out_no_task;
3076 3139
@@ -3108,14 +3171,14 @@ static const struct pid_entry tid_base_stuff[] = {
3108 REG("environ", S_IRUSR, proc_environ_operations), 3171 REG("environ", S_IRUSR, proc_environ_operations),
3109 INF("auxv", S_IRUSR, proc_pid_auxv), 3172 INF("auxv", S_IRUSR, proc_pid_auxv),
3110 ONE("status", S_IRUGO, proc_pid_status), 3173 ONE("status", S_IRUGO, proc_pid_status),
3111 ONE("personality", S_IRUSR, proc_pid_personality), 3174 ONE("personality", S_IRUGO, proc_pid_personality),
3112 INF("limits", S_IRUGO, proc_pid_limits), 3175 INF("limits", S_IRUGO, proc_pid_limits),
3113#ifdef CONFIG_SCHED_DEBUG 3176#ifdef CONFIG_SCHED_DEBUG
3114 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3177 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3115#endif 3178#endif
3116 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3179 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3117#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3180#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3118 INF("syscall", S_IRUSR, proc_pid_syscall), 3181 INF("syscall", S_IRUGO, proc_pid_syscall),
3119#endif 3182#endif
3120 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3183 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3121 ONE("stat", S_IRUGO, proc_tid_stat), 3184 ONE("stat", S_IRUGO, proc_tid_stat),
@@ -3133,7 +3196,7 @@ static const struct pid_entry tid_base_stuff[] = {
3133#ifdef CONFIG_PROC_PAGE_MONITOR 3196#ifdef CONFIG_PROC_PAGE_MONITOR
3134 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3197 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3135 REG("smaps", S_IRUGO, proc_smaps_operations), 3198 REG("smaps", S_IRUGO, proc_smaps_operations),
3136 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3199 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3137#endif 3200#endif
3138#ifdef CONFIG_SECURITY 3201#ifdef CONFIG_SECURITY
3139 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3202 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -3142,7 +3205,7 @@ static const struct pid_entry tid_base_stuff[] = {
3142 INF("wchan", S_IRUGO, proc_pid_wchan), 3205 INF("wchan", S_IRUGO, proc_pid_wchan),
3143#endif 3206#endif
3144#ifdef CONFIG_STACKTRACE 3207#ifdef CONFIG_STACKTRACE
3145 ONE("stack", S_IRUSR, proc_pid_stack), 3208 ONE("stack", S_IRUGO, proc_pid_stack),
3146#endif 3209#endif
3147#ifdef CONFIG_SCHEDSTATS 3210#ifdef CONFIG_SCHEDSTATS
3148 INF("schedstat", S_IRUGO, proc_pid_schedstat), 3211 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3161,7 +3224,7 @@ static const struct pid_entry tid_base_stuff[] = {
3161 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3224 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3162#ifdef CONFIG_AUDITSYSCALL 3225#ifdef CONFIG_AUDITSYSCALL
3163 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3226 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
3164 REG("sessionid", S_IRUSR, proc_sessionid_operations), 3227 REG("sessionid", S_IRUGO, proc_sessionid_operations),
3165#endif 3228#endif
3166#ifdef CONFIG_FAULT_INJECTION 3229#ifdef CONFIG_FAULT_INJECTION
3167 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3230 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 01e07f2a188f..f1281339b6fa 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -28,7 +28,7 @@
28 28
29DEFINE_SPINLOCK(proc_subdir_lock); 29DEFINE_SPINLOCK(proc_subdir_lock);
30 30
31static int proc_match(int len, const char *name, struct proc_dir_entry *de) 31static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
32{ 32{
33 if (de->namelen != len) 33 if (de->namelen != len)
34 return 0; 34 return 0;
@@ -303,7 +303,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
303{ 303{
304 const char *cp = name, *next; 304 const char *cp = name, *next;
305 struct proc_dir_entry *de; 305 struct proc_dir_entry *de;
306 int len; 306 unsigned int len;
307 307
308 de = *ret; 308 de = *ret;
309 if (!de) 309 if (!de)
@@ -602,7 +602,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
602{ 602{
603 struct proc_dir_entry *ent = NULL; 603 struct proc_dir_entry *ent = NULL;
604 const char *fn = name; 604 const char *fn = name;
605 int len; 605 unsigned int len;
606 606
607 /* make sure name is valid */ 607 /* make sure name is valid */
608 if (!name || !strlen(name)) goto out; 608 if (!name || !strlen(name)) goto out;
@@ -786,7 +786,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
786 struct proc_dir_entry **p; 786 struct proc_dir_entry **p;
787 struct proc_dir_entry *de = NULL; 787 struct proc_dir_entry *de = NULL;
788 const char *fn = name; 788 const char *fn = name;
789 int len; 789 unsigned int len;
790 790
791 spin_lock(&proc_subdir_lock); 791 spin_lock(&proc_subdir_lock);
792 if (__xlate_proc_name(name, &parent, &fn) != 0) { 792 if (__xlate_proc_name(name, &parent, &fn) != 0) {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d6a7ca1fdac5..d15aa1b1cc8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -46,8 +46,6 @@ static void proc_evict_inode(struct inode *inode)
46 } 46 }
47} 47}
48 48
49struct vfsmount *proc_mnt;
50
51static struct kmem_cache * proc_inode_cachep; 49static struct kmem_cache * proc_inode_cachep;
52 50
53static struct inode *proc_alloc_inode(struct super_block *sb) 51static struct inode *proc_alloc_inode(struct super_block *sb)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 9ad561ded409..c03e8d3a3a5b 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -107,7 +107,6 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
107} 107}
108void pde_put(struct proc_dir_entry *pde); 108void pde_put(struct proc_dir_entry *pde);
109 109
110extern struct vfsmount *proc_mnt;
111int proc_fill_super(struct super_block *); 110int proc_fill_super(struct super_block *);
112struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 111struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
113 112
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef9fa8e24ad6..a9000e9cfee5 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -43,17 +43,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
43 struct pid_namespace *ns; 43 struct pid_namespace *ns;
44 struct proc_inode *ei; 44 struct proc_inode *ei;
45 45
46 if (proc_mnt) {
47 /* Seed the root directory with a pid so it doesn't need
48 * to be special in base.c. I would do this earlier but
49 * the only task alive when /proc is mounted the first time
50 * is the init_task and it doesn't have any pids.
51 */
52 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
53 if (!ei->pid)
54 ei->pid = find_get_pid(1);
55 }
56
57 if (flags & MS_KERNMOUNT) 46 if (flags & MS_KERNMOUNT)
58 ns = (struct pid_namespace *)data; 47 ns = (struct pid_namespace *)data;
59 else 48 else
@@ -71,16 +60,16 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
71 return ERR_PTR(err); 60 return ERR_PTR(err);
72 } 61 }
73 62
74 ei = PROC_I(sb->s_root->d_inode);
75 if (!ei->pid) {
76 rcu_read_lock();
77 ei->pid = get_pid(find_pid_ns(1, ns));
78 rcu_read_unlock();
79 }
80
81 sb->s_flags |= MS_ACTIVE; 63 sb->s_flags |= MS_ACTIVE;
82 } 64 }
83 65
66 ei = PROC_I(sb->s_root->d_inode);
67 if (!ei->pid) {
68 rcu_read_lock();
69 ei->pid = get_pid(find_pid_ns(1, ns));
70 rcu_read_unlock();
71 }
72
84 return dget(sb->s_root); 73 return dget(sb->s_root);
85} 74}
86 75
@@ -101,19 +90,20 @@ static struct file_system_type proc_fs_type = {
101 90
102void __init proc_root_init(void) 91void __init proc_root_init(void)
103{ 92{
93 struct vfsmount *mnt;
104 int err; 94 int err;
105 95
106 proc_init_inodecache(); 96 proc_init_inodecache();
107 err = register_filesystem(&proc_fs_type); 97 err = register_filesystem(&proc_fs_type);
108 if (err) 98 if (err)
109 return; 99 return;
110 proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 100 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
111 if (IS_ERR(proc_mnt)) { 101 if (IS_ERR(mnt)) {
112 unregister_filesystem(&proc_fs_type); 102 unregister_filesystem(&proc_fs_type);
113 return; 103 return;
114 } 104 }
115 105
116 init_pid_ns.proc_mnt = proc_mnt; 106 init_pid_ns.proc_mnt = mnt;
117 proc_symlink("mounts", NULL, "self/mounts"); 107 proc_symlink("mounts", NULL, "self/mounts");
118 108
119 proc_net_init(); 109 proc_net_init();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 60b914860f81..2e7addfd9803 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,6 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/hugetlb.h> 2#include <linux/hugetlb.h>
3#include <linux/huge_mm.h>
3#include <linux/mount.h> 4#include <linux/mount.h>
4#include <linux/seq_file.h> 5#include <linux/seq_file.h>
5#include <linux/highmem.h> 6#include <linux/highmem.h>
@@ -7,6 +8,7 @@
7#include <linux/slab.h> 8#include <linux/slab.h>
8#include <linux/pagemap.h> 9#include <linux/pagemap.h>
9#include <linux/mempolicy.h> 10#include <linux/mempolicy.h>
11#include <linux/rmap.h>
10#include <linux/swap.h> 12#include <linux/swap.h>
11#include <linux/swapops.h> 13#include <linux/swapops.h>
12 14
@@ -119,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos)
119 121
120 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
121 if (!priv->task) 123 if (!priv->task)
122 return NULL; 124 return ERR_PTR(-ESRCH);
123 125
124 mm = mm_for_maps(priv->task); 126 mm = mm_for_maps(priv->task);
125 if (!mm) 127 if (!mm || IS_ERR(mm))
126 return NULL; 128 return mm;
127 down_read(&mm->mmap_sem); 129 down_read(&mm->mmap_sem);
128 130
129 tail_vma = get_gate_vma(priv->task); 131 tail_vma = get_gate_vma(priv->task->mm);
130 priv->tail_vma = tail_vma; 132 priv->tail_vma = tail_vma;
131 133
132 /* Start with last addr hint */ 134 /* Start with last addr hint */
@@ -180,7 +182,8 @@ static void m_stop(struct seq_file *m, void *v)
180 struct proc_maps_private *priv = m->private; 182 struct proc_maps_private *priv = m->private;
181 struct vm_area_struct *vma = v; 183 struct vm_area_struct *vma = v;
182 184
183 vma_stop(priv, vma); 185 if (!IS_ERR(vma))
186 vma_stop(priv, vma);
184 if (priv->task) 187 if (priv->task)
185 put_task_struct(priv->task); 188 put_task_struct(priv->task);
186} 189}
@@ -249,8 +252,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
249 const char *name = arch_vma_name(vma); 252 const char *name = arch_vma_name(vma);
250 if (!name) { 253 if (!name) {
251 if (mm) { 254 if (mm) {
252 if (vma->vm_start <= mm->start_brk && 255 if (vma->vm_start <= mm->brk &&
253 vma->vm_end >= mm->brk) { 256 vma->vm_end >= mm->start_brk) {
254 name = "[heap]"; 257 name = "[heap]";
255 } else if (vma->vm_start <= mm->start_stack && 258 } else if (vma->vm_start <= mm->start_stack &&
256 vma->vm_end >= mm->start_stack) { 259 vma->vm_end >= mm->start_stack) {
@@ -277,7 +280,8 @@ static int show_map(struct seq_file *m, void *v)
277 show_map_vma(m, vma); 280 show_map_vma(m, vma);
278 281
279 if (m->count < m->size) /* vma is copied successfully */ 282 if (m->count < m->size) /* vma is copied successfully */
280 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; 283 m->version = (vma != get_gate_vma(task->mm))
284 ? vma->vm_start : 0;
281 return 0; 285 return 0;
282} 286}
283 287
@@ -329,58 +333,86 @@ struct mem_size_stats {
329 unsigned long private_dirty; 333 unsigned long private_dirty;
330 unsigned long referenced; 334 unsigned long referenced;
331 unsigned long anonymous; 335 unsigned long anonymous;
336 unsigned long anonymous_thp;
332 unsigned long swap; 337 unsigned long swap;
333 u64 pss; 338 u64 pss;
334}; 339};
335 340
336static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 341
337 struct mm_walk *walk) 342static void smaps_pte_entry(pte_t ptent, unsigned long addr,
343 unsigned long ptent_size, struct mm_walk *walk)
338{ 344{
339 struct mem_size_stats *mss = walk->private; 345 struct mem_size_stats *mss = walk->private;
340 struct vm_area_struct *vma = mss->vma; 346 struct vm_area_struct *vma = mss->vma;
341 pte_t *pte, ptent;
342 spinlock_t *ptl;
343 struct page *page; 347 struct page *page;
344 int mapcount; 348 int mapcount;
345 349
346 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 350 if (is_swap_pte(ptent)) {
347 for (; addr != end; pte++, addr += PAGE_SIZE) { 351 mss->swap += ptent_size;
348 ptent = *pte; 352 return;
349 353 }
350 if (is_swap_pte(ptent)) {
351 mss->swap += PAGE_SIZE;
352 continue;
353 }
354 354
355 if (!pte_present(ptent)) 355 if (!pte_present(ptent))
356 continue; 356 return;
357
358 page = vm_normal_page(vma, addr, ptent);
359 if (!page)
360 return;
361
362 if (PageAnon(page))
363 mss->anonymous += ptent_size;
364
365 mss->resident += ptent_size;
366 /* Accumulate the size in pages that have been accessed. */
367 if (pte_young(ptent) || PageReferenced(page))
368 mss->referenced += ptent_size;
369 mapcount = page_mapcount(page);
370 if (mapcount >= 2) {
371 if (pte_dirty(ptent) || PageDirty(page))
372 mss->shared_dirty += ptent_size;
373 else
374 mss->shared_clean += ptent_size;
375 mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
376 } else {
377 if (pte_dirty(ptent) || PageDirty(page))
378 mss->private_dirty += ptent_size;
379 else
380 mss->private_clean += ptent_size;
381 mss->pss += (ptent_size << PSS_SHIFT);
382 }
383}
357 384
358 page = vm_normal_page(vma, addr, ptent); 385static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
359 if (!page) 386 struct mm_walk *walk)
360 continue; 387{
388 struct mem_size_stats *mss = walk->private;
389 struct vm_area_struct *vma = mss->vma;
390 pte_t *pte;
391 spinlock_t *ptl;
361 392
362 if (PageAnon(page)) 393 spin_lock(&walk->mm->page_table_lock);
363 mss->anonymous += PAGE_SIZE; 394 if (pmd_trans_huge(*pmd)) {
364 395 if (pmd_trans_splitting(*pmd)) {
365 mss->resident += PAGE_SIZE; 396 spin_unlock(&walk->mm->page_table_lock);
366 /* Accumulate the size in pages that have been accessed. */ 397 wait_split_huge_page(vma->anon_vma, pmd);
367 if (pte_young(ptent) || PageReferenced(page))
368 mss->referenced += PAGE_SIZE;
369 mapcount = page_mapcount(page);
370 if (mapcount >= 2) {
371 if (pte_dirty(ptent) || PageDirty(page))
372 mss->shared_dirty += PAGE_SIZE;
373 else
374 mss->shared_clean += PAGE_SIZE;
375 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
376 } else { 398 } else {
377 if (pte_dirty(ptent) || PageDirty(page)) 399 smaps_pte_entry(*(pte_t *)pmd, addr,
378 mss->private_dirty += PAGE_SIZE; 400 HPAGE_PMD_SIZE, walk);
379 else 401 spin_unlock(&walk->mm->page_table_lock);
380 mss->private_clean += PAGE_SIZE; 402 mss->anonymous_thp += HPAGE_PMD_SIZE;
381 mss->pss += (PAGE_SIZE << PSS_SHIFT); 403 return 0;
382 } 404 }
405 } else {
406 spin_unlock(&walk->mm->page_table_lock);
383 } 407 }
408 /*
409 * The mmap_sem held all the way back in m_start() is what
410 * keeps khugepaged out of here and from collapsing things
411 * in here.
412 */
413 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
414 for (; addr != end; pte++, addr += PAGE_SIZE)
415 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
384 pte_unmap_unlock(pte - 1, ptl); 416 pte_unmap_unlock(pte - 1, ptl);
385 cond_resched(); 417 cond_resched();
386 return 0; 418 return 0;
@@ -416,6 +448,7 @@ static int show_smap(struct seq_file *m, void *v)
416 "Private_Dirty: %8lu kB\n" 448 "Private_Dirty: %8lu kB\n"
417 "Referenced: %8lu kB\n" 449 "Referenced: %8lu kB\n"
418 "Anonymous: %8lu kB\n" 450 "Anonymous: %8lu kB\n"
451 "AnonHugePages: %8lu kB\n"
419 "Swap: %8lu kB\n" 452 "Swap: %8lu kB\n"
420 "KernelPageSize: %8lu kB\n" 453 "KernelPageSize: %8lu kB\n"
421 "MMUPageSize: %8lu kB\n" 454 "MMUPageSize: %8lu kB\n"
@@ -429,6 +462,7 @@ static int show_smap(struct seq_file *m, void *v)
429 mss.private_dirty >> 10, 462 mss.private_dirty >> 10,
430 mss.referenced >> 10, 463 mss.referenced >> 10,
431 mss.anonymous >> 10, 464 mss.anonymous >> 10,
465 mss.anonymous_thp >> 10,
432 mss.swap >> 10, 466 mss.swap >> 10,
433 vma_kernel_pagesize(vma) >> 10, 467 vma_kernel_pagesize(vma) >> 10,
434 vma_mmu_pagesize(vma) >> 10, 468 vma_mmu_pagesize(vma) >> 10,
@@ -436,7 +470,8 @@ static int show_smap(struct seq_file *m, void *v)
436 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 470 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
437 471
438 if (m->count < m->size) /* vma is copied successfully */ 472 if (m->count < m->size) /* vma is copied successfully */
439 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; 473 m->version = (vma != get_gate_vma(task->mm))
474 ? vma->vm_start : 0;
440 return 0; 475 return 0;
441} 476}
442 477
@@ -467,6 +502,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
467 spinlock_t *ptl; 502 spinlock_t *ptl;
468 struct page *page; 503 struct page *page;
469 504
505 split_huge_page_pmd(walk->mm, pmd);
506
470 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 507 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
471 for (; addr != end; pte++, addr += PAGE_SIZE) { 508 for (; addr != end; pte++, addr += PAGE_SIZE) {
472 ptent = *pte; 509 ptent = *pte;
@@ -623,6 +660,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
623 pte_t *pte; 660 pte_t *pte;
624 int err = 0; 661 int err = 0;
625 662
663 split_huge_page_pmd(walk->mm, pmd);
664
626 /* find the first VMA at or above 'addr' */ 665 /* find the first VMA at or above 'addr' */
627 vma = find_vma(walk->mm, addr); 666 vma = find_vma(walk->mm, addr);
628 for (; addr != end; addr += PAGE_SIZE) { 667 for (; addr != end; addr += PAGE_SIZE) {
@@ -728,8 +767,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
728 if (!task) 767 if (!task)
729 goto out; 768 goto out;
730 769
731 ret = -EACCES; 770 mm = mm_for_maps(task);
732 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 771 ret = PTR_ERR(mm);
772 if (!mm || IS_ERR(mm))
733 goto out_task; 773 goto out_task;
734 774
735 ret = -EINVAL; 775 ret = -EINVAL;
@@ -742,10 +782,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
742 if (!count) 782 if (!count)
743 goto out_task; 783 goto out_task;
744 784
745 mm = get_task_mm(task);
746 if (!mm)
747 goto out_task;
748
749 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 785 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
750 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 786 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
751 ret = -ENOMEM; 787 ret = -ENOMEM;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index b535d3e5d5f1..980de547c070 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos)
199 /* pin the task and mm whilst we play with them */ 199 /* pin the task and mm whilst we play with them */
200 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 200 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
201 if (!priv->task) 201 if (!priv->task)
202 return NULL; 202 return ERR_PTR(-ESRCH);
203 203
204 mm = mm_for_maps(priv->task); 204 mm = mm_for_maps(priv->task);
205 if (!mm) { 205 if (!mm || IS_ERR(mm)) {
206 put_task_struct(priv->task); 206 put_task_struct(priv->task);
207 priv->task = NULL; 207 priv->task = NULL;
208 return NULL; 208 return mm;
209 } 209 }
210 down_read(&mm->mmap_sem); 210 down_read(&mm->mmap_sem);
211 211
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 867d0ac026ce..8007ae7c0d8c 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,5 +1,5 @@
1config PSTORE 1config PSTORE
2 bool "Persistant store support" 2 bool "Persistent store support"
3 default n 3 default n
4 help 4 help
5 This option enables generic access to platform level 5 This option enables generic access to platform level
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 08342232cb1c..977ed2723845 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -27,6 +27,7 @@
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/ramfs.h> 29#include <linux/ramfs.h>
30#include <linux/parser.h>
30#include <linux/sched.h> 31#include <linux/sched.h>
31#include <linux/magic.h> 32#include <linux/magic.h>
32#include <linux/pstore.h> 33#include <linux/pstore.h>
@@ -73,11 +74,16 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
73 struct pstore_private *p = dentry->d_inode->i_private; 74 struct pstore_private *p = dentry->d_inode->i_private;
74 75
75 p->erase(p->id); 76 p->erase(p->id);
76 kfree(p);
77 77
78 return simple_unlink(dir, dentry); 78 return simple_unlink(dir, dentry);
79} 79}
80 80
81static void pstore_evict_inode(struct inode *inode)
82{
83 end_writeback(inode);
84 kfree(inode->i_private);
85}
86
81static const struct inode_operations pstore_dir_inode_operations = { 87static const struct inode_operations pstore_dir_inode_operations = {
82 .lookup = simple_lookup, 88 .lookup = simple_lookup,
83 .unlink = pstore_unlink, 89 .unlink = pstore_unlink,
@@ -107,9 +113,52 @@ static struct inode *pstore_get_inode(struct super_block *sb,
107 return inode; 113 return inode;
108} 114}
109 115
116enum {
117 Opt_kmsg_bytes, Opt_err
118};
119
120static const match_table_t tokens = {
121 {Opt_kmsg_bytes, "kmsg_bytes=%u"},
122 {Opt_err, NULL}
123};
124
125static void parse_options(char *options)
126{
127 char *p;
128 substring_t args[MAX_OPT_ARGS];
129 int option;
130
131 if (!options)
132 return;
133
134 while ((p = strsep(&options, ",")) != NULL) {
135 int token;
136
137 if (!*p)
138 continue;
139
140 token = match_token(p, tokens, args);
141 switch (token) {
142 case Opt_kmsg_bytes:
143 if (!match_int(&args[0], &option))
144 pstore_set_kmsg_bytes(option);
145 break;
146 }
147 }
148}
149
150static int pstore_remount(struct super_block *sb, int *flags, char *data)
151{
152 parse_options(data);
153
154 return 0;
155}
156
110static const struct super_operations pstore_ops = { 157static const struct super_operations pstore_ops = {
111 .statfs = simple_statfs, 158 .statfs = simple_statfs,
112 .drop_inode = generic_delete_inode, 159 .drop_inode = generic_delete_inode,
160 .evict_inode = pstore_evict_inode,
161 .remount_fs = pstore_remount,
113 .show_options = generic_show_options, 162 .show_options = generic_show_options,
114}; 163};
115 164
@@ -209,6 +258,8 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
209 sb->s_op = &pstore_ops; 258 sb->s_op = &pstore_ops;
210 sb->s_time_gran = 1; 259 sb->s_time_gran = 1;
211 260
261 parse_options(data);
262
212 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); 263 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
213 if (!inode) { 264 if (!inode) {
214 err = -ENOMEM; 265 err = -ENOMEM;
@@ -252,28 +303,7 @@ static struct file_system_type pstore_fs_type = {
252 303
253static int __init init_pstore_fs(void) 304static int __init init_pstore_fs(void)
254{ 305{
255 int rc = 0; 306 return register_filesystem(&pstore_fs_type);
256 struct kobject *pstorefs_kobj;
257
258 pstorefs_kobj = kobject_create_and_add("pstore", fs_kobj);
259 if (!pstorefs_kobj) {
260 rc = -ENOMEM;
261 goto done;
262 }
263
264 rc = sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
265 if (rc)
266 goto done1;
267
268 rc = register_filesystem(&pstore_fs_type);
269 if (rc == 0)
270 goto done;
271
272 sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
273done1:
274 kobject_put(pstorefs_kobj);
275done:
276 return rc;
277} 307}
278module_init(init_pstore_fs) 308module_init(init_pstore_fs)
279 309
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 76c26d2fab29..8c9f23eb1645 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -1,7 +1,6 @@
1extern void pstore_set_kmsg_bytes(int);
1extern void pstore_get_records(void); 2extern void pstore_get_records(void);
2extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, 3extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
3 char *data, size_t size, 4 char *data, size_t size,
4 struct timespec time, int (*erase)(u64)); 5 struct timespec time, int (*erase)(u64));
5extern int pstore_is_mounted(void); 6extern int pstore_is_mounted(void);
6
7extern struct kobj_attribute pstore_kmsg_bytes_attr;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 705fdf8abf6e..f835a25625ff 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -37,27 +37,21 @@
37static DEFINE_SPINLOCK(pstore_lock); 37static DEFINE_SPINLOCK(pstore_lock);
38static struct pstore_info *psinfo; 38static struct pstore_info *psinfo;
39 39
40/* How much of the console log to snapshot. /sys/fs/pstore/kmsg_bytes */ 40/* How much of the console log to snapshot */
41static unsigned long kmsg_bytes = 10240; 41static unsigned long kmsg_bytes = 10240;
42 42
43static ssize_t b_show(struct kobject *kobj, 43void pstore_set_kmsg_bytes(int bytes)
44 struct kobj_attribute *attr, char *buf)
45{ 44{
46 return snprintf(buf, PAGE_SIZE, "%lu\n", kmsg_bytes); 45 kmsg_bytes = bytes;
47} 46}
48 47
49static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr,
50 const char *buf, size_t count)
51{
52 return (sscanf(buf, "%lu", &kmsg_bytes) > 0) ? count : 0;
53}
54
55struct kobj_attribute pstore_kmsg_bytes_attr =
56 __ATTR(kmsg_bytes, S_IRUGO | S_IWUSR, b_show, b_store);
57
58/* Tag each group of saved records with a sequence number */ 48/* Tag each group of saved records with a sequence number */
59static int oopscount; 49static int oopscount;
60 50
51static char *reason_str[] = {
52 "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency"
53};
54
61/* 55/*
62 * callback from kmsg_dump. (s2,l2) has the most recently 56 * callback from kmsg_dump. (s2,l2) has the most recently
63 * written bytes, older bytes are in (s1,l1). Save as much 57 * written bytes, older bytes are in (s1,l1). Save as much
@@ -71,15 +65,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
71 unsigned long s1_start, s2_start; 65 unsigned long s1_start, s2_start;
72 unsigned long l1_cpy, l2_cpy; 66 unsigned long l1_cpy, l2_cpy;
73 unsigned long size, total = 0; 67 unsigned long size, total = 0;
74 char *dst; 68 char *dst, *why;
75 u64 id; 69 u64 id;
76 int hsize, part = 1; 70 int hsize, part = 1;
77 71
72 if (reason < ARRAY_SIZE(reason_str))
73 why = reason_str[reason];
74 else
75 why = "Unknown";
76
78 mutex_lock(&psinfo->buf_mutex); 77 mutex_lock(&psinfo->buf_mutex);
79 oopscount++; 78 oopscount++;
80 while (total < kmsg_bytes) { 79 while (total < kmsg_bytes) {
81 dst = psinfo->buf; 80 dst = psinfo->buf;
82 hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++); 81 hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++);
83 size = psinfo->bufsize - hsize; 82 size = psinfo->bufsize - hsize;
84 dst += hsize; 83 dst += hsize;
85 84
@@ -96,7 +95,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
96 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); 95 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
97 96
98 id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); 97 id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy);
99 if (pstore_is_mounted()) 98 if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
100 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, 99 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
101 psinfo->buf, hsize + l1_cpy + l2_cpy, 100 psinfo->buf, hsize + l1_cpy + l2_cpy,
102 CURRENT_TIME, psinfo->erase); 101 CURRENT_TIME, psinfo->erase);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index e63b4171d583..2b0646613f5a 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -335,7 +335,6 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
335static const struct address_space_operations qnx4_aops = { 335static const struct address_space_operations qnx4_aops = {
336 .readpage = qnx4_readpage, 336 .readpage = qnx4_readpage,
337 .writepage = qnx4_writepage, 337 .writepage = qnx4_writepage,
338 .sync_page = block_sync_page,
339 .write_begin = qnx4_write_begin, 338 .write_begin = qnx4_write_begin,
340 .write_end = generic_write_end, 339 .write_end = generic_write_end,
341 .bmap = qnx4_bmap 340 .bmap = qnx4_bmap
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a2a622e079f0..d3c032f5fa0a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -76,7 +76,7 @@
76#include <linux/buffer_head.h> 76#include <linux/buffer_head.h>
77#include <linux/capability.h> 77#include <linux/capability.h>
78#include <linux/quotaops.h> 78#include <linux/quotaops.h>
79#include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 79#include "../internal.h" /* ugh */
80 80
81#include <asm/uaccess.h> 81#include <asm/uaccess.h>
82 82
@@ -442,7 +442,7 @@ EXPORT_SYMBOL(dquot_acquire);
442 */ 442 */
443int dquot_commit(struct dquot *dquot) 443int dquot_commit(struct dquot *dquot)
444{ 444{
445 int ret = 0, ret2 = 0; 445 int ret = 0;
446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); 446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
447 447
448 mutex_lock(&dqopt->dqio_mutex); 448 mutex_lock(&dqopt->dqio_mutex);
@@ -454,15 +454,10 @@ int dquot_commit(struct dquot *dquot)
454 spin_unlock(&dq_list_lock); 454 spin_unlock(&dq_list_lock);
455 /* Inactive dquot can be only if there was error during read/init 455 /* Inactive dquot can be only if there was error during read/init
456 * => we have better not writing it */ 456 * => we have better not writing it */
457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { 457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); 458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
459 if (info_dirty(&dqopt->info[dquot->dq_type])) { 459 else
460 ret2 = dqopt->ops[dquot->dq_type]->write_file_info( 460 ret = -EIO;
461 dquot->dq_sb, dquot->dq_type);
462 }
463 if (ret >= 0)
464 ret = ret2;
465 }
466out_sem: 461out_sem:
467 mutex_unlock(&dqopt->dqio_mutex); 462 mutex_unlock(&dqopt->dqio_mutex);
468 return ret; 463 return ret;
@@ -900,33 +895,38 @@ static void add_dquot_ref(struct super_block *sb, int type)
900 int reserved = 0; 895 int reserved = 0;
901#endif 896#endif
902 897
903 spin_lock(&inode_lock); 898 spin_lock(&inode_sb_list_lock);
904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 899 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
905 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 900 spin_lock(&inode->i_lock);
901 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
902 !atomic_read(&inode->i_writecount) ||
903 !dqinit_needed(inode, type)) {
904 spin_unlock(&inode->i_lock);
906 continue; 905 continue;
906 }
907#ifdef CONFIG_QUOTA_DEBUG 907#ifdef CONFIG_QUOTA_DEBUG
908 if (unlikely(inode_get_rsv_space(inode) > 0)) 908 if (unlikely(inode_get_rsv_space(inode) > 0))
909 reserved = 1; 909 reserved = 1;
910#endif 910#endif
911 if (!atomic_read(&inode->i_writecount))
912 continue;
913 if (!dqinit_needed(inode, type))
914 continue;
915
916 __iget(inode); 911 __iget(inode);
917 spin_unlock(&inode_lock); 912 spin_unlock(&inode->i_lock);
913 spin_unlock(&inode_sb_list_lock);
918 914
919 iput(old_inode); 915 iput(old_inode);
920 __dquot_initialize(inode, type); 916 __dquot_initialize(inode, type);
921 /* We hold a reference to 'inode' so it couldn't have been 917
922 * removed from s_inodes list while we dropped the inode_lock. 918 /*
923 * We cannot iput the inode now as we can be holding the last 919 * We hold a reference to 'inode' so it couldn't have been
924 * reference and we cannot iput it under inode_lock. So we 920 * removed from s_inodes list while we dropped the
925 * keep the reference and iput it later. */ 921 * inode_sb_list_lock We cannot iput the inode now as we can be
922 * holding the last reference and we cannot iput it under
923 * inode_sb_list_lock. So we keep the reference and iput it
924 * later.
925 */
926 old_inode = inode; 926 old_inode = inode;
927 spin_lock(&inode_lock); 927 spin_lock(&inode_sb_list_lock);
928 } 928 }
929 spin_unlock(&inode_lock); 929 spin_unlock(&inode_sb_list_lock);
930 iput(old_inode); 930 iput(old_inode);
931 931
932#ifdef CONFIG_QUOTA_DEBUG 932#ifdef CONFIG_QUOTA_DEBUG
@@ -951,7 +951,7 @@ static inline int dqput_blocks(struct dquot *dquot)
951 951
952/* 952/*
953 * Remove references to dquots from inode and add dquot to list for freeing 953 * Remove references to dquots from inode and add dquot to list for freeing
954 * if we have the last referece to dquot 954 * if we have the last reference to dquot
955 * We can't race with anybody because we hold dqptr_sem for writing... 955 * We can't race with anybody because we hold dqptr_sem for writing...
956 */ 956 */
957static int remove_inode_dquot_ref(struct inode *inode, int type, 957static int remove_inode_dquot_ref(struct inode *inode, int type,
@@ -1007,7 +1007,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1007 struct inode *inode; 1007 struct inode *inode;
1008 int reserved = 0; 1008 int reserved = 0;
1009 1009
1010 spin_lock(&inode_lock); 1010 spin_lock(&inode_sb_list_lock);
1011 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1011 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1012 /* 1012 /*
1013 * We have to scan also I_NEW inodes because they can already 1013 * We have to scan also I_NEW inodes because they can already
@@ -1021,7 +1021,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1021 remove_inode_dquot_ref(inode, type, tofree_head); 1021 remove_inode_dquot_ref(inode, type, tofree_head);
1022 } 1022 }
1023 } 1023 }
1024 spin_unlock(&inode_lock); 1024 spin_unlock(&inode_sb_list_lock);
1025#ifdef CONFIG_QUOTA_DEBUG 1025#ifdef CONFIG_QUOTA_DEBUG
1026 if (reserved) { 1026 if (reserved) {
1027 printk(KERN_WARNING "VFS (%s): Writes happened after quota" 1027 printk(KERN_WARNING "VFS (%s): Writes happened after quota"
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9eead2c796b7..fbb0b478a346 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,6 +112,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 SetPageDirty(page); 112 SetPageDirty(page);
113 113
114 unlock_page(page); 114 unlock_page(page);
115 put_page(page);
115 } 116 }
116 117
117 return 0; 118 return 0;
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 792b3cb2cd18..3c3b00165114 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -31,9 +31,7 @@ endif
31# and causing a panic. Since this behavior only affects ppc32, this ifeq 31# and causing a panic. Since this behavior only affects ppc32, this ifeq
32# will work around it. If any other architecture displays this behavior, 32# will work around it. If any other architecture displays this behavior,
33# add it here. 33# add it here.
34ifeq ($(CONFIG_PPC32),y) 34ccflags-$(CONFIG_PPC32) := $(call cc-ifversion, -lt, 0400, -O1)
35EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0400, -O1)
36endif
37 35
38TAGS: 36TAGS:
39 etags *.c 37 etags *.c
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 1bba24bad820..4fd5bb33dbb5 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3217,7 +3217,6 @@ const struct address_space_operations reiserfs_address_space_operations = {
3217 .readpages = reiserfs_readpages, 3217 .readpages = reiserfs_readpages,
3218 .releasepage = reiserfs_releasepage, 3218 .releasepage = reiserfs_releasepage,
3219 .invalidatepage = reiserfs_invalidatepage, 3219 .invalidatepage = reiserfs_invalidatepage,
3220 .sync_page = block_sync_page,
3221 .write_begin = reiserfs_write_begin, 3220 .write_begin = reiserfs_write_begin,
3222 .write_end = reiserfs_write_end, 3221 .write_end = reiserfs_write_end,
3223 .bmap = reiserfs_aop_bmap, 3222 .bmap = reiserfs_aop_bmap,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 79265fdc317a..4e153051bc75 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -59,7 +59,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
59 if (err) 59 if (err)
60 break; 60 break;
61 61
62 if (!is_owner_or_cap(inode)) { 62 if (!inode_owner_or_capable(inode)) {
63 err = -EPERM; 63 err = -EPERM;
64 goto setflags_out; 64 goto setflags_out;
65 } 65 }
@@ -103,7 +103,7 @@ setflags_out:
103 err = put_user(inode->i_generation, (int __user *)arg); 103 err = put_user(inode->i_generation, (int __user *)arg);
104 break; 104 break;
105 case REISERFS_IOC_SETVERSION: 105 case REISERFS_IOC_SETVERSION:
106 if (!is_owner_or_cap(inode)) { 106 if (!inode_owner_or_capable(inode)) {
107 err = -EPERM; 107 err = -EPERM;
108 break; 108 break;
109 } 109 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c77514bd5776..c5e82ece7c6c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1,7 +1,7 @@
1/* 1/*
2** Write ahead logging implementation copyright Chris Mason 2000 2** Write ahead logging implementation copyright Chris Mason 2000
3** 3**
4** The background commits make this code very interelated, and 4** The background commits make this code very interrelated, and
5** overly complex. I need to rethink things a bit....The major players: 5** overly complex. I need to rethink things a bit....The major players:
6** 6**
7** journal_begin -- call with the number of blocks you expect to log. 7** journal_begin -- call with the number of blocks you expect to log.
@@ -2725,7 +2725,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2725 REISERFS_DISK_OFFSET_IN_BYTES / 2725 REISERFS_DISK_OFFSET_IN_BYTES /
2726 sb->s_blocksize + 2); 2726 sb->s_blocksize + 2);
2727 2727
2728 /* Sanity check to see is the standard journal fitting withing first bitmap 2728 /* Sanity check to see is the standard journal fitting within first bitmap
2729 (actual for small blocksizes) */ 2729 (actual for small blocksizes) */
2730 if (!SB_ONDISK_JOURNAL_DEVICE(sb) && 2730 if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2731 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + 2731 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index b87aa2c1afc1..7df1ce48203a 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -15,7 +15,7 @@
15 * for this mutex, no need for a system wide mutex facility. 15 * for this mutex, no need for a system wide mutex facility.
16 * 16 *
17 * Also this lock is often released before a call that could block because 17 * Also this lock is often released before a call that could block because
18 * reiserfs performances were partialy based on the release while schedule() 18 * reiserfs performances were partially based on the release while schedule()
19 * property of the Bkl. 19 * property of the Bkl.
20 */ 20 */
21void reiserfs_write_lock(struct super_block *s) 21void reiserfs_write_lock(struct super_block *s)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0aab04f46827..b216ff6be1c9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -393,7 +393,7 @@ void add_save_link(struct reiserfs_transaction_handle *th,
393 /* body of "save" link */ 393 /* body of "save" link */
394 link = INODE_PKEY(inode)->k_dir_id; 394 link = INODE_PKEY(inode)->k_dir_id;
395 395
396 /* put "save" link inot tree, don't charge quota to anyone */ 396 /* put "save" link into tree, don't charge quota to anyone */
397 retval = 397 retval =
398 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); 398 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
399 if (retval) { 399 if (retval) {
@@ -2104,7 +2104,7 @@ out:
2104 2104
2105/* Read data from quotafile - avoid pagecache and such because we cannot afford 2105/* Read data from quotafile - avoid pagecache and such because we cannot afford
2106 * acquiring the locks... As quota files are never truncated and quota code 2106 * acquiring the locks... As quota files are never truncated and quota code
2107 * itself serializes the operations (and noone else should touch the files) 2107 * itself serializes the operations (and no one else should touch the files)
2108 * we don't have to be afraid of races */ 2108 * we don't have to be afraid of races */
2109static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, 2109static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2110 size_t len, loff_t off) 2110 size_t len, loff_t off)
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5c11ca82b782..47d2a4498b03 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -396,7 +396,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
396 struct address_space *mapping = dir->i_mapping; 396 struct address_space *mapping = dir->i_mapping;
397 struct page *page; 397 struct page *page;
398 /* We can deadlock if we try to free dentries, 398 /* We can deadlock if we try to free dentries,
399 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 399 and an unlink/rmdir has just occurred - GFP_NOFS avoids this */
400 mapping_set_gfp_mask(mapping, GFP_NOFS); 400 mapping_set_gfp_mask(mapping, GFP_NOFS);
401 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); 401 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
402 if (!IS_ERR(page)) { 402 if (!IS_ERR(page)) {
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 90d2fcb67a31..3dc38f1206fc 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -26,7 +26,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value,
26 size_t jcreate_blocks; 26 size_t jcreate_blocks;
27 if (!reiserfs_posixacl(inode->i_sb)) 27 if (!reiserfs_posixacl(inode->i_sb))
28 return -EOPNOTSUPP; 28 return -EOPNOTSUPP;
29 if (!is_owner_or_cap(inode)) 29 if (!inode_owner_or_capable(inode))
30 return -EPERM; 30 return -EPERM;
31 31
32 if (value) { 32 if (value) {
diff --git a/fs/select.c b/fs/select.c
index e56560d2b08a..d33418fdc858 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -517,9 +517,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
517 * Update: ERESTARTSYS breaks at least the xview clock binary, so 517 * Update: ERESTARTSYS breaks at least the xview clock binary, so
518 * I'm trying ERESTARTNOHAND which restart only when you want to. 518 * I'm trying ERESTARTNOHAND which restart only when you want to.
519 */ 519 */
520#define MAX_SELECT_SECONDS \
521 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
522
523int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 520int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
524 fd_set __user *exp, struct timespec *end_time) 521 fd_set __user *exp, struct timespec *end_time)
525{ 522{
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index aa68a8a31518..efc309fa3035 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -5,12 +5,12 @@ config SQUASHFS
5 help 5 help
6 Saying Y here includes support for SquashFS 4.0 (a Compressed 6 Saying Y here includes support for SquashFS 4.0 (a Compressed
7 Read-Only File System). Squashfs is a highly compressed read-only 7 Read-Only File System). Squashfs is a highly compressed read-only
8 filesystem for Linux. It uses zlib/lzo compression to compress both 8 filesystem for Linux. It uses zlib, lzo or xz compression to
9 files, inodes and directories. Inodes in the system are very small 9 compress both files, inodes and directories. Inodes in the system
10 and all blocks are packed to minimise data overhead. Block sizes 10 are very small and all blocks are packed to minimise data overhead.
11 greater than 4K are supported up to a maximum of 1 Mbytes (default 11 Block sizes greater than 4K are supported up to a maximum of 1 Mbytes
12 block size 128K). SquashFS 4.0 supports 64 bit filesystems and files 12 (default block size 128K). SquashFS 4.0 supports 64 bit filesystems
13 (larger than 4GB), full uid/gid information, hard links and 13 and files (larger than 4GB), full uid/gid information, hard links and
14 timestamps. 14 timestamps.
15 15
16 Squashfs is intended for general read-only filesystem use, for 16 Squashfs is intended for general read-only filesystem use, for
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 26b15ae34d6f..c37b520132ff 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -104,7 +104,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
104 entry = &cache->entry[i]; 104 entry = &cache->entry[i];
105 105
106 /* 106 /*
107 * Initialise choosen cache entry, and fill it in from 107 * Initialise chosen cache entry, and fill it in from
108 * disk. 108 * disk.
109 */ 109 */
110 cache->unused--; 110 cache->unused--;
@@ -286,7 +286,7 @@ cleanup:
286 286
287 287
288/* 288/*
289 * Copy upto length bytes from cache entry to buffer starting at offset bytes 289 * Copy up to length bytes from cache entry to buffer starting at offset bytes
290 * into the cache entry. If there's not length bytes then copy the number of 290 * into the cache entry. If there's not length bytes then copy the number of
291 * bytes available. In all cases return the number of bytes copied. 291 * bytes available. In all cases return the number of bytes copied.
292 */ 292 */
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index a5940e54c4dd..e921bd213738 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -23,6 +23,7 @@
23 23
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <linux/slab.h>
26#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
27 28
28#include "squashfs_fs.h" 29#include "squashfs_fs.h"
@@ -74,3 +75,36 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
74 75
75 return decompressor[i]; 76 return decompressor[i];
76} 77}
78
79
80void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
81{
82 struct squashfs_sb_info *msblk = sb->s_fs_info;
83 void *strm, *buffer = NULL;
84 int length = 0;
85
86 /*
87 * Read decompressor specific options from file system if present
88 */
89 if (SQUASHFS_COMP_OPTS(flags)) {
90 buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
91 if (buffer == NULL)
92 return ERR_PTR(-ENOMEM);
93
94 length = squashfs_read_data(sb, &buffer,
95 sizeof(struct squashfs_super_block), 0, NULL,
96 PAGE_CACHE_SIZE, 1);
97
98 if (length < 0) {
99 strm = ERR_PTR(length);
100 goto finished;
101 }
102 }
103
104 strm = msblk->decompressor->init(msblk, buffer, length);
105
106finished:
107 kfree(buffer);
108
109 return strm;
110}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 3b305a70f7aa..099745ad5691 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26struct squashfs_decompressor { 26struct squashfs_decompressor {
27 void *(*init)(struct squashfs_sb_info *); 27 void *(*init)(struct squashfs_sb_info *, void *, int);
28 void (*free)(void *); 28 void (*free)(void *);
29 int (*decompress)(struct squashfs_sb_info *, void **, 29 int (*decompress)(struct squashfs_sb_info *, void **,
30 struct buffer_head **, int, int, int, int, int); 30 struct buffer_head **, int, int, int, int, int);
@@ -33,11 +33,6 @@ struct squashfs_decompressor {
33 int supported; 33 int supported;
34}; 34};
35 35
36static inline void *squashfs_decompressor_init(struct squashfs_sb_info *msblk)
37{
38 return msblk->decompressor->init(msblk);
39}
40
41static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, 36static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
42 void *s) 37 void *s)
43{ 38{
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 0dc340aa2be9..3f79cd1d0c19 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -172,6 +172,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
172 length += sizeof(dirh); 172 length += sizeof(dirh);
173 173
174 dir_count = le32_to_cpu(dirh.count) + 1; 174 dir_count = le32_to_cpu(dirh.count) + 1;
175
176 /* dir_count should never be larger than 256 */
177 if (dir_count > 256)
178 goto failed_read;
179
175 while (dir_count--) { 180 while (dir_count--) {
176 /* 181 /*
177 * Read directory entry. 182 * Read directory entry.
@@ -183,6 +188,10 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
183 188
184 size = le16_to_cpu(dire->size) + 1; 189 size = le16_to_cpu(dire->size) + 1;
185 190
191 /* size should never be larger than SQUASHFS_NAME_LEN */
192 if (size > SQUASHFS_NAME_LEN)
193 goto failed_read;
194
186 err = squashfs_read_metadata(inode->i_sb, dire->name, 195 err = squashfs_read_metadata(inode->i_sb, dire->name,
187 &block, &offset, size); 196 &block, &offset, size);
188 if (err < 0) 197 if (err < 0)
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 7da759e34c52..00f4dfc5f088 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -37,7 +37,7 @@ struct squashfs_lzo {
37 void *output; 37 void *output;
38}; 38};
39 39
40static void *lzo_init(struct squashfs_sb_info *msblk) 40static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
41{ 41{
42 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); 42 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
43 43
@@ -58,7 +58,7 @@ failed2:
58failed: 58failed:
59 ERROR("Failed to allocate lzo workspace\n"); 59 ERROR("Failed to allocate lzo workspace\n");
60 kfree(stream); 60 kfree(stream);
61 return NULL; 61 return ERR_PTR(-ENOMEM);
62} 62}
63 63
64 64
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 7a9464d08cf6..5d922a6701ab 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -176,6 +176,11 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
176 length += sizeof(dirh); 176 length += sizeof(dirh);
177 177
178 dir_count = le32_to_cpu(dirh.count) + 1; 178 dir_count = le32_to_cpu(dirh.count) + 1;
179
180 /* dir_count should never be larger than 256 */
181 if (dir_count > 256)
182 goto data_error;
183
179 while (dir_count--) { 184 while (dir_count--) {
180 /* 185 /*
181 * Read directory entry. 186 * Read directory entry.
@@ -187,6 +192,10 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
187 192
188 size = le16_to_cpu(dire->size) + 1; 193 size = le16_to_cpu(dire->size) + 1;
189 194
195 /* size should never be larger than SQUASHFS_NAME_LEN */
196 if (size > SQUASHFS_NAME_LEN)
197 goto data_error;
198
190 err = squashfs_read_metadata(dir->i_sb, dire->name, 199 err = squashfs_read_metadata(dir->i_sb, dire->name,
191 &block, &offset, size); 200 &block, &offset, size);
192 if (err < 0) 201 if (err < 0)
@@ -228,6 +237,9 @@ exit_lookup:
228 d_add(dentry, inode); 237 d_add(dentry, inode);
229 return ERR_PTR(0); 238 return ERR_PTR(0);
230 239
240data_error:
241 err = -EIO;
242
231read_failure: 243read_failure:
232 ERROR("Unable to read directory block [%llx:%x]\n", 244 ERROR("Unable to read directory block [%llx:%x]\n",
233 squashfs_i(dir)->start + msblk->directory_table, 245 squashfs_i(dir)->start + msblk->directory_table,
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index ba729d808876..1f2e608b8785 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -48,6 +48,7 @@ extern int squashfs_read_table(struct super_block *, void *, u64, int);
48 48
49/* decompressor.c */ 49/* decompressor.c */
50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); 50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
51extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
51 52
52/* export.c */ 53/* export.c */
53extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, 54extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64,
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 39533feffd6d..4582c568ef4d 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -57,6 +57,7 @@
57#define SQUASHFS_ALWAYS_FRAG 5 57#define SQUASHFS_ALWAYS_FRAG 5
58#define SQUASHFS_DUPLICATE 6 58#define SQUASHFS_DUPLICATE 6
59#define SQUASHFS_EXPORT 7 59#define SQUASHFS_EXPORT 7
60#define SQUASHFS_COMP_OPT 10
60 61
61#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) 62#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1)
62 63
@@ -81,6 +82,9 @@
81#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ 82#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \
82 SQUASHFS_EXPORT) 83 SQUASHFS_EXPORT)
83 84
85#define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \
86 SQUASHFS_COMP_OPT)
87
84/* Max number of types and file types */ 88/* Max number of types and file types */
85#define SQUASHFS_DIR_TYPE 1 89#define SQUASHFS_DIR_TYPE 1
86#define SQUASHFS_REG_TYPE 2 90#define SQUASHFS_REG_TYPE 2
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 20700b9f2b4c..5c8184c061a4 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -199,10 +199,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
199 199
200 err = -ENOMEM; 200 err = -ENOMEM;
201 201
202 msblk->stream = squashfs_decompressor_init(msblk);
203 if (msblk->stream == NULL)
204 goto failed_mount;
205
206 msblk->block_cache = squashfs_cache_init("metadata", 202 msblk->block_cache = squashfs_cache_init("metadata",
207 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); 203 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE);
208 if (msblk->block_cache == NULL) 204 if (msblk->block_cache == NULL)
@@ -215,6 +211,13 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
215 goto failed_mount; 211 goto failed_mount;
216 } 212 }
217 213
214 msblk->stream = squashfs_decompressor_init(sb, flags);
215 if (IS_ERR(msblk->stream)) {
216 err = PTR_ERR(msblk->stream);
217 msblk->stream = NULL;
218 goto failed_mount;
219 }
220
218 /* Allocate and read id index table */ 221 /* Allocate and read id index table */
219 msblk->id_table = squashfs_read_id_index_table(sb, 222 msblk->id_table = squashfs_read_id_index_table(sb,
220 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); 223 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids));
@@ -370,8 +373,8 @@ static void squashfs_put_super(struct super_block *sb)
370} 373}
371 374
372 375
373static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags, 376static struct dentry *squashfs_mount(struct file_system_type *fs_type,
374 const char *dev_name, void *data) 377 int flags, const char *dev_name, void *data)
375{ 378{
376 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); 379 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
377} 380}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c4eb40018256..aa47a286d1f8 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -26,10 +26,10 @@
26#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/xz.h> 28#include <linux/xz.h>
29#include <linux/bitops.h>
29 30
30#include "squashfs_fs.h" 31#include "squashfs_fs.h"
31#include "squashfs_fs_sb.h" 32#include "squashfs_fs_sb.h"
32#include "squashfs_fs_i.h"
33#include "squashfs.h" 33#include "squashfs.h"
34#include "decompressor.h" 34#include "decompressor.h"
35 35
@@ -38,24 +38,57 @@ struct squashfs_xz {
38 struct xz_buf buf; 38 struct xz_buf buf;
39}; 39};
40 40
41static void *squashfs_xz_init(struct squashfs_sb_info *msblk) 41struct comp_opts {
42 __le32 dictionary_size;
43 __le32 flags;
44};
45
46static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
47 int len)
42{ 48{
43 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); 49 struct comp_opts *comp_opts = buff;
50 struct squashfs_xz *stream;
51 int dict_size = msblk->block_size;
52 int err, n;
53
54 if (comp_opts) {
55 /* check compressor options are the expected length */
56 if (len < sizeof(*comp_opts)) {
57 err = -EIO;
58 goto failed;
59 }
44 60
45 struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL); 61 dict_size = le32_to_cpu(comp_opts->dictionary_size);
46 if (stream == NULL) 62
63 /* the dictionary size should be 2^n or 2^n+2^(n+1) */
64 n = ffs(dict_size) - 1;
65 if (dict_size != (1 << n) && dict_size != (1 << n) +
66 (1 << (n + 1))) {
67 err = -EIO;
68 goto failed;
69 }
70 }
71
72 dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
73
74 stream = kmalloc(sizeof(*stream), GFP_KERNEL);
75 if (stream == NULL) {
76 err = -ENOMEM;
47 goto failed; 77 goto failed;
78 }
48 79
49 stream->state = xz_dec_init(XZ_PREALLOC, block_size); 80 stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
50 if (stream->state == NULL) 81 if (stream->state == NULL) {
82 kfree(stream);
83 err = -ENOMEM;
51 goto failed; 84 goto failed;
85 }
52 86
53 return stream; 87 return stream;
54 88
55failed: 89failed:
56 ERROR("Failed to allocate xz workspace\n"); 90 ERROR("Failed to initialise xz decompressor\n");
57 kfree(stream); 91 return ERR_PTR(err);
58 return NULL;
59} 92}
60 93
61 94
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 4661ae2b1cec..517688b32ffa 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -26,19 +26,19 @@
26#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/zlib.h> 28#include <linux/zlib.h>
29#include <linux/vmalloc.h>
29 30
30#include "squashfs_fs.h" 31#include "squashfs_fs.h"
31#include "squashfs_fs_sb.h" 32#include "squashfs_fs_sb.h"
32#include "squashfs.h" 33#include "squashfs.h"
33#include "decompressor.h" 34#include "decompressor.h"
34 35
35static void *zlib_init(struct squashfs_sb_info *dummy) 36static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
36{ 37{
37 z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); 38 z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
38 if (stream == NULL) 39 if (stream == NULL)
39 goto failed; 40 goto failed;
40 stream->workspace = kmalloc(zlib_inflate_workspacesize(), 41 stream->workspace = vmalloc(zlib_inflate_workspacesize());
41 GFP_KERNEL);
42 if (stream->workspace == NULL) 42 if (stream->workspace == NULL)
43 goto failed; 43 goto failed;
44 44
@@ -47,7 +47,7 @@ static void *zlib_init(struct squashfs_sb_info *dummy)
47failed: 47failed:
48 ERROR("Failed to allocate zlib workspace\n"); 48 ERROR("Failed to allocate zlib workspace\n");
49 kfree(stream); 49 kfree(stream);
50 return NULL; 50 return ERR_PTR(-ENOMEM);
51} 51}
52 52
53 53
@@ -56,7 +56,7 @@ static void zlib_free(void *strm)
56 z_stream *stream = strm; 56 z_stream *stream = strm;
57 57
58 if (stream) 58 if (stream)
59 kfree(stream->workspace); 59 vfree(stream->workspace);
60 kfree(stream); 60 kfree(stream);
61} 61}
62 62
diff --git a/fs/super.c b/fs/super.c
index e84864908264..8a06881b1920 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
71#else 71#else
72 INIT_LIST_HEAD(&s->s_files); 72 INIT_LIST_HEAD(&s->s_files);
73#endif 73#endif
74 s->s_bdi = &default_backing_dev_info;
74 INIT_LIST_HEAD(&s->s_instances); 75 INIT_LIST_HEAD(&s->s_instances);
75 INIT_HLIST_BL_HEAD(&s->s_anon); 76 INIT_HLIST_BL_HEAD(&s->s_anon);
76 INIT_LIST_HEAD(&s->s_inodes); 77 INIT_LIST_HEAD(&s->s_inodes);
@@ -936,6 +937,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
936 sb = root->d_sb; 937 sb = root->d_sb;
937 BUG_ON(!sb); 938 BUG_ON(!sb);
938 WARN_ON(!sb->s_bdi); 939 WARN_ON(!sb->s_bdi);
940 WARN_ON(sb->s_bdi == &default_backing_dev_info);
939 sb->s_flags |= MS_BORN; 941 sb->s_flags |= MS_BORN;
940 942
941 error = security_sb_kern_mount(sb, flags, secdata); 943 error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sync.c b/fs/sync.c
index ba76b9623e7e..c38ec163da6c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -7,6 +7,7 @@
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/namei.h>
10#include <linux/sched.h> 11#include <linux/sched.h>
11#include <linux/writeback.h> 12#include <linux/writeback.h>
12#include <linux/syscalls.h> 13#include <linux/syscalls.h>
@@ -33,7 +34,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
33 * This should be safe, as we require bdi backing to actually 34 * This should be safe, as we require bdi backing to actually
34 * write out data in the first place 35 * write out data in the first place
35 */ 36 */
36 if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info) 37 if (sb->s_bdi == &noop_backing_dev_info)
37 return 0; 38 return 0;
38 39
39 if (sb->s_qcop && sb->s_qcop->quota_sync) 40 if (sb->s_qcop && sb->s_qcop->quota_sync)
@@ -79,7 +80,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
79 80
80static void sync_one_sb(struct super_block *sb, void *arg) 81static void sync_one_sb(struct super_block *sb, void *arg)
81{ 82{
82 if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi) 83 if (!(sb->s_flags & MS_RDONLY))
83 __sync_filesystem(sb, *(int *)arg); 84 __sync_filesystem(sb, *(int *)arg);
84} 85}
85/* 86/*
@@ -128,6 +129,29 @@ void emergency_sync(void)
128 } 129 }
129} 130}
130 131
132/*
133 * sync a single super
134 */
135SYSCALL_DEFINE1(syncfs, int, fd)
136{
137 struct file *file;
138 struct super_block *sb;
139 int ret;
140 int fput_needed;
141
142 file = fget_light(fd, &fput_needed);
143 if (!file)
144 return -EBADF;
145 sb = file->f_dentry->d_sb;
146
147 down_read(&sb->s_umount);
148 ret = sync_filesystem(sb);
149 up_read(&sb->s_umount);
150
151 fput_light(file, fput_needed);
152 return ret;
153}
154
131/** 155/**
132 * vfs_fsync_range - helper to sync a range of data & metadata to disk 156 * vfs_fsync_range - helper to sync a range of data & metadata to disk
133 * @file: file to sync 157 * @file: file to sync
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 9ca66276315e..fa8d43c92bb8 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -488,7 +488,6 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
488const struct address_space_operations sysv_aops = { 488const struct address_space_operations sysv_aops = {
489 .readpage = sysv_readpage, 489 .readpage = sysv_readpage,
490 .writepage = sysv_writepage, 490 .writepage = sysv_writepage,
491 .sync_page = block_sync_page,
492 .write_begin = sysv_write_begin, 491 .write_begin = sysv_write_begin,
493 .write_end = generic_write_end, 492 .write_end = generic_write_end,
494 .bmap = sysv_bmap 493 .bmap = sysv_bmap
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 1d1859dc3de5..f8b0160da2da 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -47,7 +47,7 @@ config UBIFS_FS_DEBUG
47 bool "Enable debugging support" 47 bool "Enable debugging support"
48 depends on UBIFS_FS 48 depends on UBIFS_FS
49 select DEBUG_FS 49 select DEBUG_FS
50 select KALLSYMS_ALL 50 select KALLSYMS
51 help 51 help
52 This option enables UBIFS debugging support. It makes sure various 52 This option enables UBIFS debugging support. It makes sure various
53 assertions, self-checks, debugging messages and test modes are compiled 53 assertions, self-checks, debugging messages and test modes are compiled
@@ -58,12 +58,3 @@ config UBIFS_FS_DEBUG
58 down UBIFS. You can then further enable / disable individual debugging 58 down UBIFS. You can then further enable / disable individual debugging
59 features using UBIFS module parameters and the corresponding sysfs 59 features using UBIFS module parameters and the corresponding sysfs
60 interfaces. 60 interfaces.
61
62config UBIFS_FS_DEBUG_CHKS
63 bool "Enable extra checks"
64 depends on UBIFS_FS_DEBUG
65 help
66 If extra checks are enabled UBIFS will check the consistency of its
67 internal data structures during operation. However, UBIFS performance
68 is dramatically slower when this option is selected especially if the
69 file system is large.
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index c8ff0d1ae5d3..8b3a7da531eb 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -147,7 +147,7 @@ static int make_free_space(struct ubifs_info *c)
147 if (liab2 < liab1) 147 if (liab2 < liab1)
148 return -EAGAIN; 148 return -EAGAIN;
149 149
150 dbg_budg("new liability %lld (not shrinked)", liab2); 150 dbg_budg("new liability %lld (not shrunk)", liab2);
151 151
152 /* Liability did not shrink again, try GC */ 152 /* Liability did not shrink again, try GC */
153 dbg_budg("Run GC"); 153 dbg_budg("Run GC");
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index b148fbc80f8d..1bd01ded7123 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -577,7 +577,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
577 size_t sz; 577 size_t sz;
578 578
579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) 579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
580 goto out; 580 return 0;
581 581
582 INIT_LIST_HEAD(&list); 582 INIT_LIST_HEAD(&list);
583 583
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 01c2b028e525..004d3745dc45 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -818,7 +818,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
818 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 818 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
819 current->pid, lnum); 819 current->pid, lnum);
820 820
821 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 821 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
822 if (!buf) { 822 if (!buf) {
823 ubifs_err("cannot allocate memory for dumping LEB %d", lnum); 823 ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
824 return; 824 return;
@@ -972,11 +972,39 @@ void dbg_dump_index(struct ubifs_info *c)
972void dbg_save_space_info(struct ubifs_info *c) 972void dbg_save_space_info(struct ubifs_info *c)
973{ 973{
974 struct ubifs_debug_info *d = c->dbg; 974 struct ubifs_debug_info *d = c->dbg;
975 975 int freeable_cnt;
976 ubifs_get_lp_stats(c, &d->saved_lst);
977 976
978 spin_lock(&c->space_lock); 977 spin_lock(&c->space_lock);
978 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
979
980 /*
981 * We use a dirty hack here and zero out @c->freeable_cnt, because it
982 * affects the free space calculations, and UBIFS might not know about
983 * all freeable eraseblocks. Indeed, we know about freeable eraseblocks
984 * only when we read their lprops, and we do this only lazily, upon the
985 * need. So at any given point of time @c->freeable_cnt might be not
986 * exactly accurate.
987 *
988 * Just one example about the issue we hit when we did not zero
989 * @c->freeable_cnt.
990 * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the
991 * amount of free space in @d->saved_free
992 * 2. We re-mount R/W, which makes UBIFS to read the "lsave"
993 * information from flash, where we cache LEBs from various
994 * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()'
995 * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()'
996 * -> 'ubifs_get_pnode()' -> 'update_cats()'
997 * -> 'ubifs_add_to_cat()').
998 * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt
999 * becomes %1.
1000 * 4. We calculate the amount of free space when the re-mount is
1001 * finished in 'dbg_check_space_info()' and it does not match
1002 * @d->saved_free.
1003 */
1004 freeable_cnt = c->freeable_cnt;
1005 c->freeable_cnt = 0;
979 d->saved_free = ubifs_get_free_space_nolock(c); 1006 d->saved_free = ubifs_get_free_space_nolock(c);
1007 c->freeable_cnt = freeable_cnt;
980 spin_unlock(&c->space_lock); 1008 spin_unlock(&c->space_lock);
981} 1009}
982 1010
@@ -993,12 +1021,15 @@ int dbg_check_space_info(struct ubifs_info *c)
993{ 1021{
994 struct ubifs_debug_info *d = c->dbg; 1022 struct ubifs_debug_info *d = c->dbg;
995 struct ubifs_lp_stats lst; 1023 struct ubifs_lp_stats lst;
996 long long avail, free; 1024 long long free;
1025 int freeable_cnt;
997 1026
998 spin_lock(&c->space_lock); 1027 spin_lock(&c->space_lock);
999 avail = ubifs_calc_available(c, c->min_idx_lebs); 1028 freeable_cnt = c->freeable_cnt;
1029 c->freeable_cnt = 0;
1030 free = ubifs_get_free_space_nolock(c);
1031 c->freeable_cnt = freeable_cnt;
1000 spin_unlock(&c->space_lock); 1032 spin_unlock(&c->space_lock);
1001 free = ubifs_get_free_space(c);
1002 1033
1003 if (free != d->saved_free) { 1034 if (free != d->saved_free) {
1004 ubifs_err("free space changed from %lld to %lld", 1035 ubifs_err("free space changed from %lld to %lld",
@@ -2806,40 +2837,38 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2806 struct ubifs_debug_info *d = c->dbg; 2837 struct ubifs_debug_info *d = c->dbg;
2807 2838
2808 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); 2839 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2809 d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); 2840 fname = d->dfs_dir_name;
2810 if (IS_ERR(d->dfs_dir)) { 2841 dent = debugfs_create_dir(fname, dfs_rootdir);
2811 err = PTR_ERR(d->dfs_dir); 2842 if (IS_ERR_OR_NULL(dent))
2812 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2813 d->dfs_dir_name, err);
2814 goto out; 2843 goto out;
2815 } 2844 d->dfs_dir = dent;
2816 2845
2817 fname = "dump_lprops"; 2846 fname = "dump_lprops";
2818 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2847 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2819 if (IS_ERR(dent)) 2848 if (IS_ERR_OR_NULL(dent))
2820 goto out_remove; 2849 goto out_remove;
2821 d->dfs_dump_lprops = dent; 2850 d->dfs_dump_lprops = dent;
2822 2851
2823 fname = "dump_budg"; 2852 fname = "dump_budg";
2824 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2853 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2825 if (IS_ERR(dent)) 2854 if (IS_ERR_OR_NULL(dent))
2826 goto out_remove; 2855 goto out_remove;
2827 d->dfs_dump_budg = dent; 2856 d->dfs_dump_budg = dent;
2828 2857
2829 fname = "dump_tnc"; 2858 fname = "dump_tnc";
2830 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2859 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2831 if (IS_ERR(dent)) 2860 if (IS_ERR_OR_NULL(dent))
2832 goto out_remove; 2861 goto out_remove;
2833 d->dfs_dump_tnc = dent; 2862 d->dfs_dump_tnc = dent;
2834 2863
2835 return 0; 2864 return 0;
2836 2865
2837out_remove: 2866out_remove:
2838 err = PTR_ERR(dent);
2839 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2840 fname, err);
2841 debugfs_remove_recursive(d->dfs_dir); 2867 debugfs_remove_recursive(d->dfs_dir);
2842out: 2868out:
2869 err = dent ? PTR_ERR(dent) : -ENODEV;
2870 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2871 fname, err);
2843 return err; 2872 return err;
2844} 2873}
2845 2874
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 919f0de29d8f..e6493cac193d 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,6 +23,12 @@
23#ifndef __UBIFS_DEBUG_H__ 23#ifndef __UBIFS_DEBUG_H__
24#define __UBIFS_DEBUG_H__ 24#define __UBIFS_DEBUG_H__
25 25
26/* Checking helper functions */
27typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
28 struct ubifs_zbranch *zbr, void *priv);
29typedef int (*dbg_znode_callback)(struct ubifs_info *c,
30 struct ubifs_znode *znode, void *priv);
31
26#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
27 33
28/** 34/**
@@ -270,11 +276,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
270void dbg_dump_index(struct ubifs_info *c); 276void dbg_dump_index(struct ubifs_info *c);
271void dbg_dump_lpt_lebs(const struct ubifs_info *c); 277void dbg_dump_lpt_lebs(const struct ubifs_info *c);
272 278
273/* Checking helper functions */
274typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
275 struct ubifs_zbranch *zbr, void *priv);
276typedef int (*dbg_znode_callback)(struct ubifs_info *c,
277 struct ubifs_znode *znode, void *priv);
278int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, 279int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
279 dbg_znode_callback znode_cb, void *priv); 280 dbg_znode_callback znode_cb, void *priv);
280 281
@@ -295,7 +296,6 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
295int dbg_check_filesystem(struct ubifs_info *c); 296int dbg_check_filesystem(struct ubifs_info *c);
296void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, 297void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
297 int add_pos); 298 int add_pos);
298int dbg_check_lprops(struct ubifs_info *c);
299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
300 int row, int col); 300 int row, int col);
301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, 301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
@@ -401,58 +401,94 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
401#define DBGKEY(key) ((char *)(key)) 401#define DBGKEY(key) ((char *)(key))
402#define DBGKEY1(key) ((char *)(key)) 402#define DBGKEY1(key) ((char *)(key))
403 403
404#define ubifs_debugging_init(c) 0 404static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
405#define ubifs_debugging_exit(c) ({}) 405static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
406 406static inline const char *dbg_ntype(int type) { return ""; }
407#define dbg_ntype(type) "" 407static inline const char *dbg_cstate(int cmt_state) { return ""; }
408#define dbg_cstate(cmt_state) "" 408static inline const char *dbg_jhead(int jhead) { return ""; }
409#define dbg_jhead(jhead) "" 409static inline const char *
410#define dbg_get_key_dump(c, key) ({}) 410dbg_get_key_dump(const struct ubifs_info *c,
411#define dbg_dump_inode(c, inode) ({}) 411 const union ubifs_key *key) { return ""; }
412#define dbg_dump_node(c, node) ({}) 412static inline void dbg_dump_inode(const struct ubifs_info *c,
413#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) 413 const struct inode *inode) { return; }
414#define dbg_dump_budget_req(req) ({}) 414static inline void dbg_dump_node(const struct ubifs_info *c,
415#define dbg_dump_lstats(lst) ({}) 415 const void *node) { return; }
416#define dbg_dump_budg(c) ({}) 416static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
417#define dbg_dump_lprop(c, lp) ({}) 417 void *node, int lnum,
418#define dbg_dump_lprops(c) ({}) 418 int offs) { return; }
419#define dbg_dump_lpt_info(c) ({}) 419static inline void
420#define dbg_dump_leb(c, lnum) ({}) 420dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
421#define dbg_dump_znode(c, znode) ({}) 421static inline void
422#define dbg_dump_heap(c, heap, cat) ({}) 422dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
423#define dbg_dump_pnode(c, pnode, parent, iip) ({}) 423static inline void dbg_dump_budg(struct ubifs_info *c) { return; }
424#define dbg_dump_tnc(c) ({}) 424static inline void dbg_dump_lprop(const struct ubifs_info *c,
425#define dbg_dump_index(c) ({}) 425 const struct ubifs_lprops *lp) { return; }
426#define dbg_dump_lpt_lebs(c) ({}) 426static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
427 427static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; }
428#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 428static inline void dbg_dump_leb(const struct ubifs_info *c,
429#define dbg_old_index_check_init(c, zroot) 0 429 int lnum) { return; }
430#define dbg_save_space_info(c) ({}) 430static inline void
431#define dbg_check_space_info(c) 0 431dbg_dump_znode(const struct ubifs_info *c,
432#define dbg_check_old_index(c, zroot) 0 432 const struct ubifs_znode *znode) { return; }
433#define dbg_check_cats(c) 0 433static inline void dbg_dump_heap(struct ubifs_info *c,
434#define dbg_check_ltab(c) 0 434 struct ubifs_lpt_heap *heap,
435#define dbg_chk_lpt_free_spc(c) 0 435 int cat) { return; }
436#define dbg_chk_lpt_sz(c, action, len) 0 436static inline void dbg_dump_pnode(struct ubifs_info *c,
437#define dbg_check_synced_i_size(inode) 0 437 struct ubifs_pnode *pnode,
438#define dbg_check_dir_size(c, dir) 0 438 struct ubifs_nnode *parent,
439#define dbg_check_tnc(c, x) 0 439 int iip) { return; }
440#define dbg_check_idx_size(c, idx_size) 0 440static inline void dbg_dump_tnc(struct ubifs_info *c) { return; }
441#define dbg_check_filesystem(c) 0 441static inline void dbg_dump_index(struct ubifs_info *c) { return; }
442#define dbg_check_heap(c, heap, cat, add_pos) ({}) 442static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; }
443#define dbg_check_lprops(c) 0 443
444#define dbg_check_lpt_nodes(c, cnode, row, col) 0 444static inline int dbg_walk_index(struct ubifs_info *c,
445#define dbg_check_inode_size(c, inode, size) 0 445 dbg_leaf_callback leaf_cb,
446#define dbg_check_data_nodes_order(c, head) 0 446 dbg_znode_callback znode_cb,
447#define dbg_check_nondata_nodes_order(c, head) 0 447 void *priv) { return 0; }
448#define dbg_force_in_the_gaps_enabled 0 448static inline void dbg_save_space_info(struct ubifs_info *c) { return; }
449#define dbg_force_in_the_gaps() 0 449static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; }
450#define dbg_failure_mode 0 450static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; }
451 451static inline int
452#define dbg_debugfs_init() 0 452dbg_old_index_check_init(struct ubifs_info *c,
453#define dbg_debugfs_exit() 453 struct ubifs_zbranch *zroot) { return 0; }
454#define dbg_debugfs_init_fs(c) 0 454static inline int
455#define dbg_debugfs_exit_fs(c) 0 455dbg_check_old_index(struct ubifs_info *c,
456 struct ubifs_zbranch *zroot) { return 0; }
457static inline int dbg_check_cats(struct ubifs_info *c) { return 0; }
458static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
459static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
460static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
461 int action, int len) { return 0; }
462static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
463static inline int dbg_check_dir_size(struct ubifs_info *c,
464 const struct inode *dir) { return 0; }
465static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
466static inline int dbg_check_idx_size(struct ubifs_info *c,
467 long long idx_size) { return 0; }
468static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; }
469static inline void dbg_check_heap(struct ubifs_info *c,
470 struct ubifs_lpt_heap *heap,
471 int cat, int add_pos) { return; }
472static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
473 struct ubifs_cnode *cnode, int row, int col) { return 0; }
474static inline int dbg_check_inode_size(struct ubifs_info *c,
475 const struct inode *inode,
476 loff_t size) { return 0; }
477static inline int
478dbg_check_data_nodes_order(struct ubifs_info *c,
479 struct list_head *head) { return 0; }
480static inline int
481dbg_check_nondata_nodes_order(struct ubifs_info *c,
482 struct list_head *head) { return 0; }
483
484static inline int dbg_force_in_the_gaps(void) { return 0; }
485#define dbg_force_in_the_gaps_enabled 0
486#define dbg_failure_mode 0
487
488static inline int dbg_debugfs_init(void) { return 0; }
489static inline void dbg_debugfs_exit(void) { return; }
490static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; }
491static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; }
456 492
457#endif /* !CONFIG_UBIFS_FS_DEBUG */ 493#endif /* !CONFIG_UBIFS_FS_DEBUG */
458#endif /* !__UBIFS_DEBUG_H__ */ 494#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index d77db7e36484..b286db79c686 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -448,10 +448,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
448 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { 448 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
449 /* 449 /*
450 * We change whole page so no need to load it. But we 450 * We change whole page so no need to load it. But we
451 * have to set the @PG_checked flag to make the further 451 * do not know whether this page exists on the media or
452 * code know that the page is new. This might be not 452 * not, so we assume the latter because it requires
453 * true, but it is better to budget more than to read 453 * larger budget. The assumption is that it is better
454 * the page from the media. 454 * to budget a bit more than to read the page from the
455 * media. Thus, we are setting the @PG_checked flag
456 * here.
455 */ 457 */
456 SetPageChecked(page); 458 SetPageChecked(page);
457 skipped_read = 1; 459 skipped_read = 1;
@@ -559,6 +561,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
559 dbg_gen("copied %d instead of %d, read page and repeat", 561 dbg_gen("copied %d instead of %d, read page and repeat",
560 copied, len); 562 copied, len);
561 cancel_budget(c, page, ui, appending); 563 cancel_budget(c, page, ui, appending);
564 ClearPageChecked(page);
562 565
563 /* 566 /*
564 * Return 0 to force VFS to repeat the whole operation, or the 567 * Return 0 to force VFS to repeat the whole operation, or the
@@ -1309,6 +1312,9 @@ int ubifs_fsync(struct file *file, int datasync)
1309 1312
1310 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1311 1314
1315 if (inode->i_sb->s_flags & MS_RDONLY)
1316 return 0;
1317
1312 /* 1318 /*
1313 * VFS has already synchronized dirty pages for this inode. Synchronize 1319 * VFS has already synchronized dirty pages for this inode. Synchronize
1314 * the inode unless this is a 'datasync()' call. 1320 * the inode unless this is a 'datasync()' call.
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 8aacd64957a2..548acf494afd 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -160,7 +160,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
160 if (IS_RDONLY(inode)) 160 if (IS_RDONLY(inode))
161 return -EROFS; 161 return -EROFS;
162 162
163 if (!is_owner_or_cap(inode)) 163 if (!inode_owner_or_capable(inode))
164 return -EACCES; 164 return -EACCES;
165 165
166 if (get_user(flags, (int __user *) arg)) 166 if (get_user(flags, (int __user *) arg))
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index c7b25e2f7764..0ee0847f2421 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1094,7 +1094,7 @@ static int scan_check_cb(struct ubifs_info *c,
1094 } 1094 }
1095 } 1095 }
1096 1096
1097 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 1097 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1098 if (!buf) { 1098 if (!buf) {
1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum); 1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum);
1100 goto out; 1100 goto out;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 72775d35b99e..ef5155e109a2 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1270,10 +1270,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1270 lnum = branch->lnum; 1270 lnum = branch->lnum;
1271 offs = branch->offs; 1271 offs = branch->offs;
1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); 1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1273 if (!pnode) { 1273 if (!pnode)
1274 err = -ENOMEM; 1274 return -ENOMEM;
1275 goto out; 1275
1276 }
1277 if (lnum == 0) { 1276 if (lnum == 0) {
1278 /* 1277 /*
1279 * This pnode was not written which just means that the LEB 1278 * This pnode was not written which just means that the LEB
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 0a3c2c3f5c4a..0c9c69bd983a 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1633,7 +1633,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1634 return 0; 1634 return 0;
1635 1635
1636 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 1636 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1637 if (!buf) { 1637 if (!buf) {
1638 ubifs_err("cannot allocate memory for ltab checking"); 1638 ubifs_err("cannot allocate memory for ltab checking");
1639 return 0; 1639 return 0;
@@ -1885,7 +1885,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1885 1885
1886 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 1886 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
1887 current->pid, lnum); 1887 current->pid, lnum);
1888 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 1888 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1889 if (!buf) { 1889 if (!buf) {
1890 ubifs_err("cannot allocate memory to dump LPT"); 1890 ubifs_err("cannot allocate memory to dump LPT");
1891 return; 1891 return;
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 2cdbd31641d7..09df318e368f 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -898,7 +898,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
898 if (c->no_orphs) 898 if (c->no_orphs)
899 return 0; 899 return 0;
900 900
901 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 901 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
902 if (!buf) { 902 if (!buf) {
903 ubifs_err("cannot allocate memory to check orphans"); 903 ubifs_err("cannot allocate memory to check orphans");
904 return 0; 904 return 0;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 936f2cbfe6b6..3dbad6fbd1eb 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -317,6 +317,32 @@ int ubifs_recover_master_node(struct ubifs_info *c)
317 goto out_free; 317 goto out_free;
318 } 318 }
319 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); 319 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
320
321 /*
322 * We had to recover the master node, which means there was an
323 * unclean reboot. However, it is possible that the master node
324 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
325 * E.g., consider the following chain of events:
326 *
327 * 1. UBIFS was cleanly unmounted, so the master node is clean
328 * 2. UBIFS is being mounted R/W and starts changing the master
329 * node in the first (%UBIFS_MST_LNUM). A power cut happens,
330 * so this LEB ends up with some amount of garbage at the
331 * end.
332 * 3. UBIFS is being mounted R/O. We reach this place and
333 * recover the master node from the second LEB
334 * (%UBIFS_MST_LNUM + 1). But we cannot update the media
335 * because we are being mounted R/O. We have to defer the
336 * operation.
337 * 4. However, this master node (@c->mst_node) is marked as
338 * clean (since the step 1). And if we just return, the
339 * mount code will be confused and won't recover the master
340 * node when it is re-mounter R/W later.
341 *
342 * Thus, to force the recovery by marking the master node as
343 * dirty.
344 */
345 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
320 } else { 346 } else {
321 /* Write the recovered master node */ 347 /* Write the recovered master node */
322 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; 348 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index e5dc1e120e8d..be6c7b008f38 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1568,6 +1568,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1568 mutex_lock(&c->umount_mutex); 1568 mutex_lock(&c->umount_mutex);
1569 dbg_save_space_info(c); 1569 dbg_save_space_info(c);
1570 c->remounting_rw = 1; 1570 c->remounting_rw = 1;
1571 c->ro_mount = 0;
1571 1572
1572 err = check_free_space(c); 1573 err = check_free_space(c);
1573 if (err) 1574 if (err)
@@ -1670,19 +1671,30 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1670 if (err) 1671 if (err)
1671 goto out; 1672 goto out;
1672 1673
1674 dbg_gen("re-mounted read-write");
1675 c->remounting_rw = 0;
1676
1673 if (c->need_recovery) { 1677 if (c->need_recovery) {
1674 c->need_recovery = 0; 1678 c->need_recovery = 0;
1675 ubifs_msg("deferred recovery completed"); 1679 ubifs_msg("deferred recovery completed");
1680 } else {
1681 /*
1682 * Do not run the debugging space check if the were doing
1683 * recovery, because when we saved the information we had the
1684 * file-system in a state where the TNC and lprops has been
1685 * modified in memory, but all the I/O operations (including a
1686 * commit) were deferred. So the file-system was in
1687 * "non-committed" state. Now the file-system is in committed
1688 * state, and of course the amount of free space will change
1689 * because, for example, the old index size was imprecise.
1690 */
1691 err = dbg_check_space_info(c);
1676 } 1692 }
1677
1678 dbg_gen("re-mounted read-write");
1679 c->ro_mount = 0;
1680 c->remounting_rw = 0;
1681 err = dbg_check_space_info(c);
1682 mutex_unlock(&c->umount_mutex); 1693 mutex_unlock(&c->umount_mutex);
1683 return err; 1694 return err;
1684 1695
1685out: 1696out:
1697 c->ro_mount = 1;
1686 vfree(c->orph_buf); 1698 vfree(c->orph_buf);
1687 c->orph_buf = NULL; 1699 c->orph_buf = NULL;
1688 if (c->bgt) { 1700 if (c->bgt) {
@@ -1760,10 +1772,12 @@ static void ubifs_put_super(struct super_block *sb)
1760 * of the media. For example, there will be dirty inodes if we failed 1772 * of the media. For example, there will be dirty inodes if we failed
1761 * to write them back because of I/O errors. 1773 * to write them back because of I/O errors.
1762 */ 1774 */
1763 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1775 if (!c->ro_error) {
1764 ubifs_assert(c->budg_idx_growth == 0); 1776 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
1765 ubifs_assert(c->budg_dd_growth == 0); 1777 ubifs_assert(c->budg_idx_growth == 0);
1766 ubifs_assert(c->budg_data_growth == 0); 1778 ubifs_assert(c->budg_dd_growth == 0);
1779 ubifs_assert(c->budg_data_growth == 0);
1780 }
1767 1781
1768 /* 1782 /*
1769 * The 'c->umount_lock' prevents races between UBIFS memory shrinker 1783 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
@@ -2011,7 +2025,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
2011 */ 2025 */
2012 c->bdi.name = "ubifs", 2026 c->bdi.name = "ubifs",
2013 c->bdi.capabilities = BDI_CAP_MAP_COPY; 2027 c->bdi.capabilities = BDI_CAP_MAP_COPY;
2014 c->bdi.unplug_io_fn = default_unplug_io_fn;
2015 err = bdi_init(&c->bdi); 2028 err = bdi_init(&c->bdi);
2016 if (err) 2029 if (err)
2017 goto out_close; 2030 goto out_close;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c74400f88fe0..3299f469e712 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -56,6 +56,7 @@
56 */ 56 */
57 57
58#include "ubifs.h" 58#include "ubifs.h"
59#include <linux/fs.h>
59#include <linux/slab.h> 60#include <linux/slab.h>
60#include <linux/xattr.h> 61#include <linux/xattr.h>
61#include <linux/posix_acl_xattr.h> 62#include <linux/posix_acl_xattr.h>
@@ -80,7 +81,6 @@ enum {
80}; 81};
81 82
82static const struct inode_operations none_inode_operations; 83static const struct inode_operations none_inode_operations;
83static const struct address_space_operations none_address_operations;
84static const struct file_operations none_file_operations; 84static const struct file_operations none_file_operations;
85 85
86/** 86/**
@@ -130,7 +130,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
130 } 130 }
131 131
132 /* Re-define all operations to be "nothing" */ 132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations; 133 inode->i_mapping->a_ops = &empty_aops;
134 inode->i_op = &none_inode_operations; 134 inode->i_op = &none_inode_operations;
135 inode->i_fop = &none_file_operations; 135 inode->i_fop = &none_file_operations;
136 136
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 8994dd041660..95518a9f589e 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -27,11 +27,10 @@
27#include "udf_i.h" 27#include "udf_i.h"
28#include "udf_sb.h" 28#include "udf_sb.h"
29 29
30#define udf_clear_bit(nr, addr) ext2_clear_bit(nr, addr) 30#define udf_clear_bit __test_and_clear_bit_le
31#define udf_set_bit(nr, addr) ext2_set_bit(nr, addr) 31#define udf_set_bit __test_and_set_bit_le
32#define udf_test_bit(nr, addr) ext2_test_bit(nr, addr) 32#define udf_test_bit test_bit_le
33#define udf_find_next_one_bit(addr, size, offset) \ 33#define udf_find_next_one_bit find_next_bit_le
34 ext2_find_next_bit((unsigned long *)(addr), size, offset)
35 34
36static int read_block_bitmap(struct super_block *sb, 35static int read_block_bitmap(struct super_block *sb,
37 struct udf_bitmap *bitmap, unsigned int block, 36 struct udf_bitmap *bitmap, unsigned int block,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index f391a2adc699..2a346bb1d9f5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -98,7 +98,6 @@ static int udf_adinicb_write_end(struct file *file,
98const struct address_space_operations udf_adinicb_aops = { 98const struct address_space_operations udf_adinicb_aops = {
99 .readpage = udf_adinicb_readpage, 99 .readpage = udf_adinicb_readpage,
100 .writepage = udf_adinicb_writepage, 100 .writepage = udf_adinicb_writepage,
101 .sync_page = block_sync_page,
102 .write_begin = simple_write_begin, 101 .write_begin = simple_write_begin,
103 .write_end = udf_adinicb_write_end, 102 .write_end = udf_adinicb_write_end,
104}; 103};
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ccc814321414..1d1358ed80c1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -140,7 +140,6 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
140const struct address_space_operations udf_aops = { 140const struct address_space_operations udf_aops = {
141 .readpage = udf_readpage, 141 .readpage = udf_readpage,
142 .writepage = udf_writepage, 142 .writepage = udf_writepage,
143 .sync_page = block_sync_page,
144 .write_begin = udf_write_begin, 143 .write_begin = udf_write_begin,
145 .write_end = generic_write_end, 144 .write_end = generic_write_end,
146 .bmap = udf_bmap, 145 .bmap = udf_bmap,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 6863599f7033..b4d791a83207 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -78,7 +78,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
78 78
79/* 79/*
80 * Returns the location of the fragment from 80 * Returns the location of the fragment from
81 * the begining of the filesystem. 81 * the beginning of the filesystem.
82 */ 82 */
83 83
84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) 84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
@@ -552,7 +552,6 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
552const struct address_space_operations ufs_aops = { 552const struct address_space_operations ufs_aops = {
553 .readpage = ufs_readpage, 553 .readpage = ufs_readpage,
554 .writepage = ufs_writepage, 554 .writepage = ufs_writepage,
555 .sync_page = block_sync_page,
556 .write_begin = ufs_write_begin, 555 .write_begin = ufs_write_begin,
557 .write_end = generic_write_end, 556 .write_end = generic_write_end,
558 .bmap = ufs_bmap 557 .bmap = ufs_bmap
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 7693d6293404..3915ade6f9a8 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -483,9 +483,9 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
483} 483}
484 484
485/* 485/*
486 * Diffrent types of UFS hold fs_cstotal in different 486 * Different types of UFS hold fs_cstotal in different
487 * places, and use diffrent data structure for it. 487 * places, and use different data structure for it.
488 * To make things simplier we just copy fs_cstotal to ufs_sb_private_info 488 * To make things simpler we just copy fs_cstotal to ufs_sb_private_info
489 */ 489 */
490static void ufs_setup_cstotal(struct super_block *sb) 490static void ufs_setup_cstotal(struct super_block *sb)
491{ 491{
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e56a4f567212..5f821dbc0579 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -479,7 +479,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
479 break; 479 break;
480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) 480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
481 ufs_sync_inode (inode); 481 ufs_sync_inode (inode);
482 blk_run_address_space(inode->i_mapping);
483 yield(); 482 yield();
484 } 483 }
485 484
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 9f8775ce381c..954175928240 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -408,7 +408,7 @@ static inline unsigned _ubh_find_next_zero_bit_(
408 for (;;) { 408 for (;;) {
409 count = min_t(unsigned int, size + offset, uspi->s_bpf); 409 count = min_t(unsigned int, size + offset, uspi->s_bpf);
410 size -= count - offset; 410 size -= count - offset;
411 pos = ext2_find_next_zero_bit (ubh->bh[base]->b_data, count, offset); 411 pos = find_next_zero_bit_le(ubh->bh[base]->b_data, count, offset);
412 if (pos < count || !size) 412 if (pos < count || !size)
413 break; 413 break;
414 base++; 414 base++;
diff --git a/fs/utimes.c b/fs/utimes.c
index 179b58690657..ba653f3dc1bc 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -95,7 +95,7 @@ static int utimes_common(struct path *path, struct timespec *times)
95 if (IS_IMMUTABLE(inode)) 95 if (IS_IMMUTABLE(inode))
96 goto mnt_drop_write_and_out; 96 goto mnt_drop_write_and_out;
97 97
98 if (!is_owner_or_cap(inode)) { 98 if (!inode_owner_or_capable(inode)) {
99 error = inode_permission(inode, MAY_WRITE); 99 error = inode_permission(inode, MAY_WRITE);
100 if (error) 100 if (error)
101 goto mnt_drop_write_and_out; 101 goto mnt_drop_write_and_out;
diff --git a/fs/xattr.c b/fs/xattr.c
index 01bb8135e14a..f1ef94974dea 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -59,7 +59,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
59 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 59 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
60 return -EPERM; 60 return -EPERM;
61 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && 61 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
62 (mask & MAY_WRITE) && !is_owner_or_cap(inode)) 62 (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
63 return -EPERM; 63 return -EPERM;
64 } 64 }
65 65
@@ -666,7 +666,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(dentry, name, value, size, 0, handler->flags); 669 return handler->set(dentry, name, value, size, flags, handler->flags);
670} 670}
671 671
672/* 672/*
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index faca44997099..284a7c89697e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,14 +16,11 @@
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17# 17#
18 18
19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 19ccflags-y := -I$(src) -I$(src)/linux-2.6
20ccflags-$(CONFIG_XFS_DEBUG) += -g
20 21
21XFS_LINUX := linux-2.6 22XFS_LINUX := linux-2.6
22 23
23ifeq ($(CONFIG_XFS_DEBUG),y)
24 EXTRA_CFLAGS += -g
25endif
26
27obj-$(CONFIG_XFS_FS) += xfs.o 24obj-$(CONFIG_XFS_FS) += xfs.o
28 25
29xfs-y += linux-2.6/xfs_trace.o 26xfs-y += linux-2.6/xfs_trace.o
@@ -105,11 +102,10 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
105 xfs_globals.o \ 102 xfs_globals.o \
106 xfs_ioctl.o \ 103 xfs_ioctl.o \
107 xfs_iops.o \ 104 xfs_iops.o \
105 xfs_message.o \
108 xfs_super.o \ 106 xfs_super.o \
109 xfs_sync.o \ 107 xfs_sync.o \
110 xfs_xattr.o) 108 xfs_xattr.o)
111 109
112# Objects in support/ 110# Objects in support/
113xfs-y += $(addprefix support/, \ 111xfs-y += support/uuid.o
114 debug.o \
115 uuid.o)
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 666c9db48eb6..a907de565db3 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -23,6 +23,7 @@
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include "time.h" 24#include "time.h"
25#include "kmem.h" 25#include "kmem.h"
26#include "xfs_message.h"
26 27
27/* 28/*
28 * Greedy allocation. May fail and may return vmalloced memory. 29 * Greedy allocation. May fail and may return vmalloced memory.
@@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
56 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 57 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
57 return ptr; 58 return ptr;
58 if (!(++retries % 100)) 59 if (!(++retries % 100))
59 printk(KERN_ERR "XFS: possible memory allocation " 60 xfs_err(NULL,
60 "deadlock in %s (mode:0x%x)\n", 61 "possible memory allocation deadlock in %s (mode:0x%x)",
61 __func__, lflags); 62 __func__, lflags);
62 congestion_wait(BLK_RW_ASYNC, HZ/50); 63 congestion_wait(BLK_RW_ASYNC, HZ/50);
63 } while (1); 64 } while (1);
@@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
112 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 113 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
113 return ptr; 114 return ptr;
114 if (!(++retries % 100)) 115 if (!(++retries % 100))
115 printk(KERN_ERR "XFS: possible memory allocation " 116 xfs_err(NULL,
116 "deadlock in %s (mode:0x%x)\n", 117 "possible memory allocation deadlock in %s (mode:0x%x)",
117 __func__, lflags); 118 __func__, lflags);
118 congestion_wait(BLK_RW_ASYNC, HZ/50); 119 congestion_wait(BLK_RW_ASYNC, HZ/50);
119 } while (1); 120 } while (1);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index ec7bbb5645b6..79ce38be15a1 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -413,8 +413,7 @@ xfs_submit_ioend_bio(
413 if (xfs_ioend_new_eof(ioend)) 413 if (xfs_ioend_new_eof(ioend))
414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); 414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
415 415
416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
417 WRITE_SYNC_PLUG : WRITE, bio);
418} 417}
419 418
420STATIC struct bio * 419STATIC struct bio *
@@ -854,7 +853,7 @@ xfs_aops_discard_page(
854 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 853 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
855 goto out_invalidate; 854 goto out_invalidate;
856 855
857 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 856 xfs_alert(ip->i_mount,
858 "page discard on page %p, inode 0x%llx, offset %llu.", 857 "page discard on page %p, inode 0x%llx, offset %llu.",
859 page, ip->i_ino, offset); 858 page, ip->i_ino, offset);
860 859
@@ -872,7 +871,7 @@ xfs_aops_discard_page(
872 if (error) { 871 if (error) {
873 /* something screwed, just bail */ 872 /* something screwed, just bail */
874 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 873 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
875 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 874 xfs_alert(ip->i_mount,
876 "page discard unable to remove delalloc mapping."); 875 "page discard unable to remove delalloc mapping.");
877 } 876 }
878 break; 877 break;
@@ -1296,7 +1295,7 @@ xfs_get_blocks_direct(
1296 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1295 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1297 * need to issue a transaction to convert the range from unwritten to written 1296 * need to issue a transaction to convert the range from unwritten to written
1298 * extents. In case this is regular synchronous I/O we just call xfs_end_io 1297 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1299 * to do this and we are done. But in case this was a successfull AIO 1298 * to do this and we are done. But in case this was a successful AIO
1300 * request this handler is called from interrupt context, from which we 1299 * request this handler is called from interrupt context, from which we
1301 * can't start transactions. In that case offload the I/O completion to 1300 * can't start transactions. In that case offload the I/O completion to
1302 * the workqueues we also use for buffered I/O completion. 1301 * the workqueues we also use for buffered I/O completion.
@@ -1411,7 +1410,7 @@ xfs_vm_write_failed(
1411 if (error) { 1410 if (error) {
1412 /* something screwed, just bail */ 1411 /* something screwed, just bail */
1413 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1412 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1414 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 1413 xfs_alert(ip->i_mount,
1415 "xfs_vm_write_failed: unable to clean up ino %lld", 1414 "xfs_vm_write_failed: unable to clean up ino %lld",
1416 ip->i_ino); 1415 ip->i_ino);
1417 } 1416 }
@@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = {
1495 .readpages = xfs_vm_readpages, 1494 .readpages = xfs_vm_readpages,
1496 .writepage = xfs_vm_writepage, 1495 .writepage = xfs_vm_writepage,
1497 .writepages = xfs_vm_writepages, 1496 .writepages = xfs_vm_writepages,
1498 .sync_page = block_sync_page,
1499 .releasepage = xfs_vm_releasepage, 1497 .releasepage = xfs_vm_releasepage,
1500 .invalidatepage = xfs_vm_invalidatepage, 1498 .invalidatepage = xfs_vm_invalidatepage,
1501 .write_begin = xfs_vm_write_begin, 1499 .write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index f83a4c830a65..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -94,75 +94,6 @@ xfs_buf_vmap_len(
94} 94}
95 95
96/* 96/*
97 * Page Region interfaces.
98 *
99 * For pages in filesystems where the blocksize is smaller than the
100 * pagesize, we use the page->private field (long) to hold a bitmap
101 * of uptodate regions within the page.
102 *
103 * Each such region is "bytes per page / bits per long" bytes long.
104 *
105 * NBPPR == number-of-bytes-per-page-region
106 * BTOPR == bytes-to-page-region (rounded up)
107 * BTOPRT == bytes-to-page-region-truncated (rounded down)
108 */
109#if (BITS_PER_LONG == 32)
110#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
111#elif (BITS_PER_LONG == 64)
112#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
113#else
114#error BITS_PER_LONG must be 32 or 64
115#endif
116#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
117#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
118#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
119
120STATIC unsigned long
121page_region_mask(
122 size_t offset,
123 size_t length)
124{
125 unsigned long mask;
126 int first, final;
127
128 first = BTOPR(offset);
129 final = BTOPRT(offset + length - 1);
130 first = min(first, final);
131
132 mask = ~0UL;
133 mask <<= BITS_PER_LONG - (final - first);
134 mask >>= BITS_PER_LONG - (final);
135
136 ASSERT(offset + length <= PAGE_CACHE_SIZE);
137 ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
138
139 return mask;
140}
141
142STATIC void
143set_page_region(
144 struct page *page,
145 size_t offset,
146 size_t length)
147{
148 set_page_private(page,
149 page_private(page) | page_region_mask(offset, length));
150 if (page_private(page) == ~0UL)
151 SetPageUptodate(page);
152}
153
154STATIC int
155test_page_region(
156 struct page *page,
157 size_t offset,
158 size_t length)
159{
160 unsigned long mask = page_region_mask(offset, length);
161
162 return (mask && (page_private(page) & mask) == mask);
163}
164
165/*
166 * xfs_buf_lru_add - add a buffer to the LRU. 97 * xfs_buf_lru_add - add a buffer to the LRU.
167 * 98 *
168 * The LRU takes a new reference to the buffer so that it will only be freed 99 * The LRU takes a new reference to the buffer so that it will only be freed
@@ -189,7 +120,7 @@ xfs_buf_lru_add(
189 * The unlocked check is safe here because it only occurs when there are not 120 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there 121 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling 122 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the 123 * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
193 * bt_lru_lock. 124 * bt_lru_lock.
194 */ 125 */
195STATIC void 126STATIC void
@@ -332,7 +263,7 @@ xfs_buf_free(
332 263
333 ASSERT(list_empty(&bp->b_lru)); 264 ASSERT(list_empty(&bp->b_lru));
334 265
335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 266 if (bp->b_flags & _XBF_PAGES) {
336 uint i; 267 uint i;
337 268
338 if (xfs_buf_is_vmapped(bp)) 269 if (xfs_buf_is_vmapped(bp))
@@ -342,56 +273,77 @@ xfs_buf_free(
342 for (i = 0; i < bp->b_page_count; i++) { 273 for (i = 0; i < bp->b_page_count; i++) {
343 struct page *page = bp->b_pages[i]; 274 struct page *page = bp->b_pages[i];
344 275
345 if (bp->b_flags & _XBF_PAGE_CACHE) 276 __free_page(page);
346 ASSERT(!PagePrivate(page));
347 page_cache_release(page);
348 } 277 }
349 } 278 } else if (bp->b_flags & _XBF_KMEM)
279 kmem_free(bp->b_addr);
350 _xfs_buf_free_pages(bp); 280 _xfs_buf_free_pages(bp);
351 xfs_buf_deallocate(bp); 281 xfs_buf_deallocate(bp);
352} 282}
353 283
354/* 284/*
355 * Finds all pages for buffer in question and builds it's page list. 285 * Allocates all the pages for buffer in question and builds it's page list.
356 */ 286 */
357STATIC int 287STATIC int
358_xfs_buf_lookup_pages( 288xfs_buf_allocate_memory(
359 xfs_buf_t *bp, 289 xfs_buf_t *bp,
360 uint flags) 290 uint flags)
361{ 291{
362 struct address_space *mapping = bp->b_target->bt_mapping;
363 size_t blocksize = bp->b_target->bt_bsize;
364 size_t size = bp->b_count_desired; 292 size_t size = bp->b_count_desired;
365 size_t nbytes, offset; 293 size_t nbytes, offset;
366 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
367 unsigned short page_count, i; 295 unsigned short page_count, i;
368 pgoff_t first;
369 xfs_off_t end; 296 xfs_off_t end;
370 int error; 297 int error;
371 298
299 /*
300 * for buffers that are contained within a single page, just allocate
301 * the memory from the heap - there's no need for the complexity of
302 * page arrays to keep allocation down to order 0.
303 */
304 if (bp->b_buffer_length < PAGE_SIZE) {
305 bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
306 if (!bp->b_addr) {
307 /* low memory - use alloc_page loop instead */
308 goto use_alloc_page;
309 }
310
311 if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
312 PAGE_MASK) !=
313 ((unsigned long)bp->b_addr & PAGE_MASK)) {
314 /* b_addr spans two pages - use alloc_page instead */
315 kmem_free(bp->b_addr);
316 bp->b_addr = NULL;
317 goto use_alloc_page;
318 }
319 bp->b_offset = offset_in_page(bp->b_addr);
320 bp->b_pages = bp->b_page_array;
321 bp->b_pages[0] = virt_to_page(bp->b_addr);
322 bp->b_page_count = 1;
323 bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
324 return 0;
325 }
326
327use_alloc_page:
372 end = bp->b_file_offset + bp->b_buffer_length; 328 end = bp->b_file_offset + bp->b_buffer_length;
373 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); 329 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
374
375 error = _xfs_buf_get_pages(bp, page_count, flags); 330 error = _xfs_buf_get_pages(bp, page_count, flags);
376 if (unlikely(error)) 331 if (unlikely(error))
377 return error; 332 return error;
378 bp->b_flags |= _XBF_PAGE_CACHE;
379 333
380 offset = bp->b_offset; 334 offset = bp->b_offset;
381 first = bp->b_file_offset >> PAGE_CACHE_SHIFT; 335 bp->b_flags |= _XBF_PAGES;
382 336
383 for (i = 0; i < bp->b_page_count; i++) { 337 for (i = 0; i < bp->b_page_count; i++) {
384 struct page *page; 338 struct page *page;
385 uint retries = 0; 339 uint retries = 0;
386 340retry:
387 retry: 341 page = alloc_page(gfp_mask);
388 page = find_or_create_page(mapping, first + i, gfp_mask);
389 if (unlikely(page == NULL)) { 342 if (unlikely(page == NULL)) {
390 if (flags & XBF_READ_AHEAD) { 343 if (flags & XBF_READ_AHEAD) {
391 bp->b_page_count = i; 344 bp->b_page_count = i;
392 for (i = 0; i < bp->b_page_count; i++) 345 error = ENOMEM;
393 unlock_page(bp->b_pages[i]); 346 goto out_free_pages;
394 return -ENOMEM;
395 } 347 }
396 348
397 /* 349 /*
@@ -401,9 +353,8 @@ _xfs_buf_lookup_pages(
401 * handle buffer allocation failures we can't do much. 353 * handle buffer allocation failures we can't do much.
402 */ 354 */
403 if (!(++retries % 100)) 355 if (!(++retries % 100))
404 printk(KERN_ERR 356 xfs_err(NULL,
405 "XFS: possible memory allocation " 357 "possible memory allocation deadlock in %s (mode:0x%x)",
406 "deadlock in %s (mode:0x%x)\n",
407 __func__, gfp_mask); 358 __func__, gfp_mask);
408 359
409 XFS_STATS_INC(xb_page_retries); 360 XFS_STATS_INC(xb_page_retries);
@@ -413,52 +364,44 @@ _xfs_buf_lookup_pages(
413 364
414 XFS_STATS_INC(xb_page_found); 365 XFS_STATS_INC(xb_page_found);
415 366
416 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 367 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
417 size -= nbytes; 368 size -= nbytes;
418
419 ASSERT(!PagePrivate(page));
420 if (!PageUptodate(page)) {
421 page_count--;
422 if (blocksize >= PAGE_CACHE_SIZE) {
423 if (flags & XBF_READ)
424 bp->b_flags |= _XBF_PAGE_LOCKED;
425 } else if (!PagePrivate(page)) {
426 if (test_page_region(page, offset, nbytes))
427 page_count++;
428 }
429 }
430
431 bp->b_pages[i] = page; 369 bp->b_pages[i] = page;
432 offset = 0; 370 offset = 0;
433 } 371 }
372 return 0;
434 373
435 if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { 374out_free_pages:
436 for (i = 0; i < bp->b_page_count; i++) 375 for (i = 0; i < bp->b_page_count; i++)
437 unlock_page(bp->b_pages[i]); 376 __free_page(bp->b_pages[i]);
438 }
439
440 if (page_count == bp->b_page_count)
441 bp->b_flags |= XBF_DONE;
442
443 return error; 377 return error;
444} 378}
445 379
446/* 380/*
447 * Map buffer into kernel address-space if nessecary. 381 * Map buffer into kernel address-space if necessary.
448 */ 382 */
449STATIC int 383STATIC int
450_xfs_buf_map_pages( 384_xfs_buf_map_pages(
451 xfs_buf_t *bp, 385 xfs_buf_t *bp,
452 uint flags) 386 uint flags)
453{ 387{
454 /* A single page buffer is always mappable */ 388 ASSERT(bp->b_flags & _XBF_PAGES);
455 if (bp->b_page_count == 1) { 389 if (bp->b_page_count == 1) {
390 /* A single page buffer is always mappable */
456 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 391 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
457 bp->b_flags |= XBF_MAPPED; 392 bp->b_flags |= XBF_MAPPED;
458 } else if (flags & XBF_MAPPED) { 393 } else if (flags & XBF_MAPPED) {
459 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 394 int retried = 0;
460 -1, PAGE_KERNEL); 395
461 if (unlikely(bp->b_addr == NULL)) 396 do {
397 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
398 -1, PAGE_KERNEL);
399 if (bp->b_addr)
400 break;
401 vm_unmap_aliases();
402 } while (retried++ <= 1);
403
404 if (!bp->b_addr)
462 return -ENOMEM; 405 return -ENOMEM;
463 bp->b_addr += bp->b_offset; 406 bp->b_addr += bp->b_offset;
464 bp->b_flags |= XBF_MAPPED; 407 bp->b_flags |= XBF_MAPPED;
@@ -569,9 +512,14 @@ found:
569 } 512 }
570 } 513 }
571 514
515 /*
516 * if the buffer is stale, clear all the external state associated with
517 * it. We need to keep flags such as how we allocated the buffer memory
518 * intact here.
519 */
572 if (bp->b_flags & XBF_STALE) { 520 if (bp->b_flags & XBF_STALE) {
573 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 521 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
574 bp->b_flags &= XBF_MAPPED; 522 bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
575 } 523 }
576 524
577 trace_xfs_buf_find(bp, flags, _RET_IP_); 525 trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -592,7 +540,7 @@ xfs_buf_get(
592 xfs_buf_flags_t flags) 540 xfs_buf_flags_t flags)
593{ 541{
594 xfs_buf_t *bp, *new_bp; 542 xfs_buf_t *bp, *new_bp;
595 int error = 0, i; 543 int error = 0;
596 544
597 new_bp = xfs_buf_allocate(flags); 545 new_bp = xfs_buf_allocate(flags);
598 if (unlikely(!new_bp)) 546 if (unlikely(!new_bp))
@@ -600,7 +548,7 @@ xfs_buf_get(
600 548
601 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); 549 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
602 if (bp == new_bp) { 550 if (bp == new_bp) {
603 error = _xfs_buf_lookup_pages(bp, flags); 551 error = xfs_buf_allocate_memory(bp, flags);
604 if (error) 552 if (error)
605 goto no_buffer; 553 goto no_buffer;
606 } else { 554 } else {
@@ -609,14 +557,11 @@ xfs_buf_get(
609 return NULL; 557 return NULL;
610 } 558 }
611 559
612 for (i = 0; i < bp->b_page_count; i++)
613 mark_page_accessed(bp->b_pages[i]);
614
615 if (!(bp->b_flags & XBF_MAPPED)) { 560 if (!(bp->b_flags & XBF_MAPPED)) {
616 error = _xfs_buf_map_pages(bp, flags); 561 error = _xfs_buf_map_pages(bp, flags);
617 if (unlikely(error)) { 562 if (unlikely(error)) {
618 printk(KERN_WARNING "%s: failed to map pages\n", 563 xfs_warn(target->bt_mount,
619 __func__); 564 "%s: failed to map pages\n", __func__);
620 goto no_buffer; 565 goto no_buffer;
621 } 566 }
622 } 567 }
@@ -710,10 +655,7 @@ xfs_buf_readahead(
710 xfs_off_t ioff, 655 xfs_off_t ioff,
711 size_t isize) 656 size_t isize)
712{ 657{
713 struct backing_dev_info *bdi; 658 if (bdi_read_congested(target->bt_bdi))
714
715 bdi = target->bt_mapping->backing_dev_info;
716 if (bdi_read_congested(bdi))
717 return; 659 return;
718 660
719 xfs_buf_read(target, ioff, isize, 661 xfs_buf_read(target, ioff, isize,
@@ -791,10 +733,10 @@ xfs_buf_associate_memory(
791 size_t buflen; 733 size_t buflen;
792 int page_count; 734 int page_count;
793 735
794 pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; 736 pageaddr = (unsigned long)mem & PAGE_MASK;
795 offset = (unsigned long)mem - pageaddr; 737 offset = (unsigned long)mem - pageaddr;
796 buflen = PAGE_CACHE_ALIGN(len + offset); 738 buflen = PAGE_ALIGN(len + offset);
797 page_count = buflen >> PAGE_CACHE_SHIFT; 739 page_count = buflen >> PAGE_SHIFT;
798 740
799 /* Free any previous set of page pointers */ 741 /* Free any previous set of page pointers */
800 if (bp->b_pages) 742 if (bp->b_pages)
@@ -811,13 +753,12 @@ xfs_buf_associate_memory(
811 753
812 for (i = 0; i < bp->b_page_count; i++) { 754 for (i = 0; i < bp->b_page_count; i++) {
813 bp->b_pages[i] = mem_to_page((void *)pageaddr); 755 bp->b_pages[i] = mem_to_page((void *)pageaddr);
814 pageaddr += PAGE_CACHE_SIZE; 756 pageaddr += PAGE_SIZE;
815 } 757 }
816 758
817 bp->b_count_desired = len; 759 bp->b_count_desired = len;
818 bp->b_buffer_length = buflen; 760 bp->b_buffer_length = buflen;
819 bp->b_flags |= XBF_MAPPED; 761 bp->b_flags |= XBF_MAPPED;
820 bp->b_flags &= ~_XBF_PAGE_LOCKED;
821 762
822 return 0; 763 return 0;
823} 764}
@@ -850,8 +791,8 @@ xfs_buf_get_uncached(
850 791
851 error = _xfs_buf_map_pages(bp, XBF_MAPPED); 792 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
852 if (unlikely(error)) { 793 if (unlikely(error)) {
853 printk(KERN_WARNING "%s: failed to map pages\n", 794 xfs_warn(target->bt_mount,
854 __func__); 795 "%s: failed to map pages\n", __func__);
855 goto fail_free_mem; 796 goto fail_free_mem;
856 } 797 }
857 798
@@ -924,20 +865,7 @@ xfs_buf_rele(
924 865
925 866
926/* 867/*
927 * Mutual exclusion on buffers. Locking model: 868 * Lock a buffer object, if it is not already locked.
928 *
929 * Buffers associated with inodes for which buffer locking
930 * is not enabled are not protected by semaphores, and are
931 * assumed to be exclusively owned by the caller. There is a
932 * spinlock in the buffer, used by the caller when concurrent
933 * access is possible.
934 */
935
936/*
937 * Locks a buffer object, if it is not already locked. Note that this in
938 * no way locks the underlying pages, so it is only useful for
939 * synchronizing concurrent use of buffer objects, not for synchronizing
940 * independent access to the underlying pages.
941 * 869 *
942 * If we come across a stale, pinned, locked buffer, we know that we are 870 * If we come across a stale, pinned, locked buffer, we know that we are
943 * being asked to lock a buffer that has been reallocated. Because it is 871 * being asked to lock a buffer that has been reallocated. Because it is
@@ -971,10 +899,7 @@ xfs_buf_lock_value(
971} 899}
972 900
973/* 901/*
974 * Locks a buffer object. 902 * Lock a buffer object.
975 * Note that this in no way locks the underlying pages, so it is only
976 * useful for synchronizing concurrent use of buffer objects, not for
977 * synchronizing independent access to the underlying pages.
978 * 903 *
979 * If we come across a stale, pinned, locked buffer, we know that we 904 * If we come across a stale, pinned, locked buffer, we know that we
980 * are being asked to lock a buffer that has been reallocated. Because 905 * are being asked to lock a buffer that has been reallocated. Because
@@ -990,8 +915,6 @@ xfs_buf_lock(
990 915
991 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
992 xfs_log_force(bp->b_target->bt_mount, 0); 917 xfs_log_force(bp->b_target->bt_mount, 0);
993 if (atomic_read(&bp->b_io_remaining))
994 blk_run_address_space(bp->b_target->bt_mapping);
995 down(&bp->b_sema); 918 down(&bp->b_sema);
996 XB_SET_OWNER(bp); 919 XB_SET_OWNER(bp);
997 920
@@ -1035,9 +958,7 @@ xfs_buf_wait_unpin(
1035 set_current_state(TASK_UNINTERRUPTIBLE); 958 set_current_state(TASK_UNINTERRUPTIBLE);
1036 if (atomic_read(&bp->b_pin_count) == 0) 959 if (atomic_read(&bp->b_pin_count) == 0)
1037 break; 960 break;
1038 if (atomic_read(&bp->b_io_remaining)) 961 io_schedule();
1039 blk_run_address_space(bp->b_target->bt_mapping);
1040 schedule();
1041 } 962 }
1042 remove_wait_queue(&bp->b_waiters, &wait); 963 remove_wait_queue(&bp->b_waiters, &wait);
1043 set_current_state(TASK_RUNNING); 964 set_current_state(TASK_RUNNING);
@@ -1249,10 +1170,8 @@ _xfs_buf_ioend(
1249 xfs_buf_t *bp, 1170 xfs_buf_t *bp,
1250 int schedule) 1171 int schedule)
1251{ 1172{
1252 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { 1173 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1253 bp->b_flags &= ~_XBF_PAGE_LOCKED;
1254 xfs_buf_ioend(bp, schedule); 1174 xfs_buf_ioend(bp, schedule);
1255 }
1256} 1175}
1257 1176
1258STATIC void 1177STATIC void
@@ -1261,35 +1180,12 @@ xfs_buf_bio_end_io(
1261 int error) 1180 int error)
1262{ 1181{
1263 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1182 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1264 unsigned int blocksize = bp->b_target->bt_bsize;
1265 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1266 1183
1267 xfs_buf_ioerror(bp, -error); 1184 xfs_buf_ioerror(bp, -error);
1268 1185
1269 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1186 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1270 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1187 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1271 1188
1272 do {
1273 struct page *page = bvec->bv_page;
1274
1275 ASSERT(!PagePrivate(page));
1276 if (unlikely(bp->b_error)) {
1277 if (bp->b_flags & XBF_READ)
1278 ClearPageUptodate(page);
1279 } else if (blocksize >= PAGE_CACHE_SIZE) {
1280 SetPageUptodate(page);
1281 } else if (!PagePrivate(page) &&
1282 (bp->b_flags & _XBF_PAGE_CACHE)) {
1283 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1284 }
1285
1286 if (--bvec >= bio->bi_io_vec)
1287 prefetchw(&bvec->bv_page->flags);
1288
1289 if (bp->b_flags & _XBF_PAGE_LOCKED)
1290 unlock_page(page);
1291 } while (bvec >= bio->bi_io_vec);
1292
1293 _xfs_buf_ioend(bp, 1); 1189 _xfs_buf_ioend(bp, 1);
1294 bio_put(bio); 1190 bio_put(bio);
1295} 1191}
@@ -1303,7 +1199,6 @@ _xfs_buf_ioapply(
1303 int offset = bp->b_offset; 1199 int offset = bp->b_offset;
1304 int size = bp->b_count_desired; 1200 int size = bp->b_count_desired;
1305 sector_t sector = bp->b_bn; 1201 sector_t sector = bp->b_bn;
1306 unsigned int blocksize = bp->b_target->bt_bsize;
1307 1202
1308 total_nr_pages = bp->b_page_count; 1203 total_nr_pages = bp->b_page_count;
1309 map_i = 0; 1204 map_i = 0;
@@ -1324,29 +1219,6 @@ _xfs_buf_ioapply(
1324 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1219 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
1325 } 1220 }
1326 1221
1327 /* Special code path for reading a sub page size buffer in --
1328 * we populate up the whole page, and hence the other metadata
1329 * in the same page. This optimization is only valid when the
1330 * filesystem block size is not smaller than the page size.
1331 */
1332 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1333 ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
1334 (XBF_READ|_XBF_PAGE_LOCKED)) &&
1335 (blocksize >= PAGE_CACHE_SIZE)) {
1336 bio = bio_alloc(GFP_NOIO, 1);
1337
1338 bio->bi_bdev = bp->b_target->bt_bdev;
1339 bio->bi_sector = sector - (offset >> BBSHIFT);
1340 bio->bi_end_io = xfs_buf_bio_end_io;
1341 bio->bi_private = bp;
1342
1343 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1344 size = 0;
1345
1346 atomic_inc(&bp->b_io_remaining);
1347
1348 goto submit_io;
1349 }
1350 1222
1351next_chunk: 1223next_chunk:
1352 atomic_inc(&bp->b_io_remaining); 1224 atomic_inc(&bp->b_io_remaining);
@@ -1360,8 +1232,9 @@ next_chunk:
1360 bio->bi_end_io = xfs_buf_bio_end_io; 1232 bio->bi_end_io = xfs_buf_bio_end_io;
1361 bio->bi_private = bp; 1233 bio->bi_private = bp;
1362 1234
1235
1363 for (; size && nr_pages; nr_pages--, map_i++) { 1236 for (; size && nr_pages; nr_pages--, map_i++) {
1364 int rbytes, nbytes = PAGE_CACHE_SIZE - offset; 1237 int rbytes, nbytes = PAGE_SIZE - offset;
1365 1238
1366 if (nbytes > size) 1239 if (nbytes > size)
1367 nbytes = size; 1240 nbytes = size;
@@ -1376,7 +1249,6 @@ next_chunk:
1376 total_nr_pages--; 1249 total_nr_pages--;
1377 } 1250 }
1378 1251
1379submit_io:
1380 if (likely(bio->bi_size)) { 1252 if (likely(bio->bi_size)) {
1381 if (xfs_buf_is_vmapped(bp)) { 1253 if (xfs_buf_is_vmapped(bp)) {
1382 flush_kernel_vmap_range(bp->b_addr, 1254 flush_kernel_vmap_range(bp->b_addr,
@@ -1386,18 +1258,7 @@ submit_io:
1386 if (size) 1258 if (size)
1387 goto next_chunk; 1259 goto next_chunk;
1388 } else { 1260 } else {
1389 /*
1390 * if we get here, no pages were added to the bio. However,
1391 * we can't just error out here - if the pages are locked then
1392 * we have to unlock them otherwise we can hang on a later
1393 * access to the page.
1394 */
1395 xfs_buf_ioerror(bp, EIO); 1261 xfs_buf_ioerror(bp, EIO);
1396 if (bp->b_flags & _XBF_PAGE_LOCKED) {
1397 int i;
1398 for (i = 0; i < bp->b_page_count; i++)
1399 unlock_page(bp->b_pages[i]);
1400 }
1401 bio_put(bio); 1262 bio_put(bio);
1402 } 1263 }
1403} 1264}
@@ -1442,8 +1303,6 @@ xfs_buf_iowait(
1442{ 1303{
1443 trace_xfs_buf_iowait(bp, _RET_IP_); 1304 trace_xfs_buf_iowait(bp, _RET_IP_);
1444 1305
1445 if (atomic_read(&bp->b_io_remaining))
1446 blk_run_address_space(bp->b_target->bt_mapping);
1447 wait_for_completion(&bp->b_iowait); 1306 wait_for_completion(&bp->b_iowait);
1448 1307
1449 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1308 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1461,8 +1320,8 @@ xfs_buf_offset(
1461 return XFS_BUF_PTR(bp) + offset; 1320 return XFS_BUF_PTR(bp) + offset;
1462 1321
1463 offset += bp->b_offset; 1322 offset += bp->b_offset;
1464 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; 1323 page = bp->b_pages[offset >> PAGE_SHIFT];
1465 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); 1324 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
1466} 1325}
1467 1326
1468/* 1327/*
@@ -1484,9 +1343,9 @@ xfs_buf_iomove(
1484 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; 1343 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1485 cpoff = xfs_buf_poff(boff + bp->b_offset); 1344 cpoff = xfs_buf_poff(boff + bp->b_offset);
1486 csize = min_t(size_t, 1345 csize = min_t(size_t,
1487 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); 1346 PAGE_SIZE-cpoff, bp->b_count_desired-boff);
1488 1347
1489 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1348 ASSERT(((csize + cpoff) <= PAGE_SIZE));
1490 1349
1491 switch (mode) { 1350 switch (mode) {
1492 case XBRW_ZERO: 1351 case XBRW_ZERO:
@@ -1599,7 +1458,6 @@ xfs_free_buftarg(
1599 xfs_flush_buftarg(btp, 1); 1458 xfs_flush_buftarg(btp, 1);
1600 if (mp->m_flags & XFS_MOUNT_BARRIER) 1459 if (mp->m_flags & XFS_MOUNT_BARRIER)
1601 xfs_blkdev_issue_flush(btp); 1460 xfs_blkdev_issue_flush(btp);
1602 iput(btp->bt_mapping->host);
1603 1461
1604 kthread_stop(btp->bt_task); 1462 kthread_stop(btp->bt_task);
1605 kmem_free(btp); 1463 kmem_free(btp);
@@ -1617,21 +1475,12 @@ xfs_setsize_buftarg_flags(
1617 btp->bt_smask = sectorsize - 1; 1475 btp->bt_smask = sectorsize - 1;
1618 1476
1619 if (set_blocksize(btp->bt_bdev, sectorsize)) { 1477 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1620 printk(KERN_WARNING 1478 xfs_warn(btp->bt_mount,
1621 "XFS: Cannot set_blocksize to %u on device %s\n", 1479 "Cannot set_blocksize to %u on device %s\n",
1622 sectorsize, XFS_BUFTARG_NAME(btp)); 1480 sectorsize, XFS_BUFTARG_NAME(btp));
1623 return EINVAL; 1481 return EINVAL;
1624 } 1482 }
1625 1483
1626 if (verbose &&
1627 (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
1628 printk(KERN_WARNING
1629 "XFS: %u byte sectors in use on device %s. "
1630 "This is suboptimal; %u or greater is ideal.\n",
1631 sectorsize, XFS_BUFTARG_NAME(btp),
1632 (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
1633 }
1634
1635 return 0; 1484 return 0;
1636} 1485}
1637 1486
@@ -1646,7 +1495,7 @@ xfs_setsize_buftarg_early(
1646 struct block_device *bdev) 1495 struct block_device *bdev)
1647{ 1496{
1648 return xfs_setsize_buftarg_flags(btp, 1497 return xfs_setsize_buftarg_flags(btp,
1649 PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); 1498 PAGE_SIZE, bdev_logical_block_size(bdev), 0);
1650} 1499}
1651 1500
1652int 1501int
@@ -1659,41 +1508,6 @@ xfs_setsize_buftarg(
1659} 1508}
1660 1509
1661STATIC int 1510STATIC int
1662xfs_mapping_buftarg(
1663 xfs_buftarg_t *btp,
1664 struct block_device *bdev)
1665{
1666 struct backing_dev_info *bdi;
1667 struct inode *inode;
1668 struct address_space *mapping;
1669 static const struct address_space_operations mapping_aops = {
1670 .sync_page = block_sync_page,
1671 .migratepage = fail_migrate_page,
1672 };
1673
1674 inode = new_inode(bdev->bd_inode->i_sb);
1675 if (!inode) {
1676 printk(KERN_WARNING
1677 "XFS: Cannot allocate mapping inode for device %s\n",
1678 XFS_BUFTARG_NAME(btp));
1679 return ENOMEM;
1680 }
1681 inode->i_ino = get_next_ino();
1682 inode->i_mode = S_IFBLK;
1683 inode->i_bdev = bdev;
1684 inode->i_rdev = bdev->bd_dev;
1685 bdi = blk_get_backing_dev_info(bdev);
1686 if (!bdi)
1687 bdi = &default_backing_dev_info;
1688 mapping = &inode->i_data;
1689 mapping->a_ops = &mapping_aops;
1690 mapping->backing_dev_info = bdi;
1691 mapping_set_gfp_mask(mapping, GFP_NOFS);
1692 btp->bt_mapping = mapping;
1693 return 0;
1694}
1695
1696STATIC int
1697xfs_alloc_delwrite_queue( 1511xfs_alloc_delwrite_queue(
1698 xfs_buftarg_t *btp, 1512 xfs_buftarg_t *btp,
1699 const char *fsname) 1513 const char *fsname)
@@ -1721,12 +1535,14 @@ xfs_alloc_buftarg(
1721 btp->bt_mount = mp; 1535 btp->bt_mount = mp;
1722 btp->bt_dev = bdev->bd_dev; 1536 btp->bt_dev = bdev->bd_dev;
1723 btp->bt_bdev = bdev; 1537 btp->bt_bdev = bdev;
1538 btp->bt_bdi = blk_get_backing_dev_info(bdev);
1539 if (!btp->bt_bdi)
1540 goto error;
1541
1724 INIT_LIST_HEAD(&btp->bt_lru); 1542 INIT_LIST_HEAD(&btp->bt_lru);
1725 spin_lock_init(&btp->bt_lru_lock); 1543 spin_lock_init(&btp->bt_lru_lock);
1726 if (xfs_setsize_buftarg_early(btp, bdev)) 1544 if (xfs_setsize_buftarg_early(btp, bdev))
1727 goto error; 1545 goto error;
1728 if (xfs_mapping_buftarg(btp, bdev))
1729 goto error;
1730 if (xfs_alloc_delwrite_queue(btp, fsname)) 1546 if (xfs_alloc_delwrite_queue(btp, fsname))
1731 goto error; 1547 goto error;
1732 btp->bt_shrinker.shrink = xfs_buftarg_shrink; 1548 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
@@ -1923,8 +1739,8 @@ xfsbufd(
1923 do { 1739 do {
1924 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1740 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1925 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1741 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1926 int count = 0;
1927 struct list_head tmp; 1742 struct list_head tmp;
1743 struct blk_plug plug;
1928 1744
1929 if (unlikely(freezing(current))) { 1745 if (unlikely(freezing(current))) {
1930 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1746 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1940,16 +1756,15 @@ xfsbufd(
1940 1756
1941 xfs_buf_delwri_split(target, &tmp, age); 1757 xfs_buf_delwri_split(target, &tmp, age);
1942 list_sort(NULL, &tmp, xfs_buf_cmp); 1758 list_sort(NULL, &tmp, xfs_buf_cmp);
1759
1760 blk_start_plug(&plug);
1943 while (!list_empty(&tmp)) { 1761 while (!list_empty(&tmp)) {
1944 struct xfs_buf *bp; 1762 struct xfs_buf *bp;
1945 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1763 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1946 list_del_init(&bp->b_list); 1764 list_del_init(&bp->b_list);
1947 xfs_bdstrat_cb(bp); 1765 xfs_bdstrat_cb(bp);
1948 count++;
1949 } 1766 }
1950 if (count) 1767 blk_finish_plug(&plug);
1951 blk_run_address_space(target->bt_mapping);
1952
1953 } while (!kthread_should_stop()); 1768 } while (!kthread_should_stop());
1954 1769
1955 return 0; 1770 return 0;
@@ -1969,6 +1784,7 @@ xfs_flush_buftarg(
1969 int pincount = 0; 1784 int pincount = 0;
1970 LIST_HEAD(tmp_list); 1785 LIST_HEAD(tmp_list);
1971 LIST_HEAD(wait_list); 1786 LIST_HEAD(wait_list);
1787 struct blk_plug plug;
1972 1788
1973 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 xfs_buf_runall_queues(xfsconvertd_workqueue);
1974 xfs_buf_runall_queues(xfsdatad_workqueue); 1790 xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1983,6 +1799,8 @@ xfs_flush_buftarg(
1983 * we do that after issuing all the IO. 1799 * we do that after issuing all the IO.
1984 */ 1800 */
1985 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1801 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1802
1803 blk_start_plug(&plug);
1986 while (!list_empty(&tmp_list)) { 1804 while (!list_empty(&tmp_list)) {
1987 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1805 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1988 ASSERT(target == bp->b_target); 1806 ASSERT(target == bp->b_target);
@@ -1993,10 +1811,10 @@ xfs_flush_buftarg(
1993 } 1811 }
1994 xfs_bdstrat_cb(bp); 1812 xfs_bdstrat_cb(bp);
1995 } 1813 }
1814 blk_finish_plug(&plug);
1996 1815
1997 if (wait) { 1816 if (wait) {
1998 /* Expedite and wait for IO to complete. */ 1817 /* Wait for IO to complete. */
1999 blk_run_address_space(target->bt_mapping);
2000 while (!list_empty(&wait_list)) { 1818 while (!list_empty(&wait_list)) {
2001 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1819 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2002 1820
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index cbe65950e524..a9a1c4512645 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -61,30 +61,11 @@ typedef enum {
61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ 61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
62 62
63/* flags used only internally */ 63/* flags used only internally */
64#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
65#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ 64#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
66#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ 65#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
66#define _XBF_KMEM (1 << 20)/* backed by heap memory */
67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ 67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
68 68
69/*
70 * Special flag for supporting metadata blocks smaller than a FSB.
71 *
72 * In this case we can have multiple xfs_buf_t on a single page and
73 * need to lock out concurrent xfs_buf_t readers as they only
74 * serialise access to the buffer.
75 *
76 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
77 * between reads of the page. Hence we can have one thread read the
78 * page and modify it, but then race with another thread that thinks
79 * the page is not up-to-date and hence reads it again.
80 *
81 * The result is that the first modifcation to the page is lost.
82 * This sort of AGF/AGI reading race can happen when unlinking inodes
83 * that require truncation and results in the AGI unlinked list
84 * modifications being lost.
85 */
86#define _XBF_PAGE_LOCKED (1 << 22)
87
88typedef unsigned int xfs_buf_flags_t; 69typedef unsigned int xfs_buf_flags_t;
89 70
90#define XFS_BUF_FLAGS \ 71#define XFS_BUF_FLAGS \
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
100 { XBF_LOCK, "LOCK" }, /* should never be set */\ 81 { XBF_LOCK, "LOCK" }, /* should never be set */\
101 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ 82 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
102 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ 83 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
103 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
104 { _XBF_PAGES, "PAGES" }, \ 84 { _XBF_PAGES, "PAGES" }, \
105 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ 85 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
106 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 86 { _XBF_KMEM, "KMEM" }, \
107 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } 87 { _XBF_DELWRI_Q, "DELWRI_Q" }
108
109 88
110typedef enum { 89typedef enum {
111 XBT_FORCE_SLEEP = 0, 90 XBT_FORCE_SLEEP = 0,
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
120typedef struct xfs_buftarg { 99typedef struct xfs_buftarg {
121 dev_t bt_dev; 100 dev_t bt_dev;
122 struct block_device *bt_bdev; 101 struct block_device *bt_bdev;
123 struct address_space *bt_mapping; 102 struct backing_dev_info *bt_bdi;
124 struct xfs_mount *bt_mount; 103 struct xfs_mount *bt_mount;
125 unsigned int bt_bsize; 104 unsigned int bt_bsize;
126 unsigned int bt_sshift; 105 unsigned int bt_sshift;
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
139 unsigned int bt_lru_nr; 118 unsigned int bt_lru_nr;
140} xfs_buftarg_t; 119} xfs_buftarg_t;
141 120
142/*
143 * xfs_buf_t: Buffer structure for pagecache-based buffers
144 *
145 * This buffer structure is used by the pagecache buffer management routines
146 * to refer to an assembly of pages forming a logical buffer.
147 *
148 * The buffer structure is used on a temporary basis only, and discarded when
149 * released. The real data storage is recorded in the pagecache. Buffers are
150 * hashed to the block device on which the file system resides.
151 */
152
153struct xfs_buf; 121struct xfs_buf;
154typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); 122typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
155 123
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index a55c1b46b219..f4213ba1ff85 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -381,7 +381,7 @@ xfs_aio_write_isize_update(
381 381
382/* 382/*
383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then 383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
384 * part of the I/O may have been written to disk before the error occured. In 384 * part of the I/O may have been written to disk before the error occurred. In
385 * this case the on-disk file size may have been adjusted beyond the in-memory 385 * this case the on-disk file size may have been adjusted beyond the in-memory
386 * file size and now needs to be truncated back. 386 * file size and now needs to be truncated back.
387 */ 387 */
@@ -896,6 +896,7 @@ xfs_file_fallocate(
896 xfs_flock64_t bf; 896 xfs_flock64_t bf;
897 xfs_inode_t *ip = XFS_I(inode); 897 xfs_inode_t *ip = XFS_I(inode);
898 int cmd = XFS_IOC_RESVSP; 898 int cmd = XFS_IOC_RESVSP;
899 int attr_flags = XFS_ATTR_NOLOCK;
899 900
900 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 901 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
901 return -EOPNOTSUPP; 902 return -EOPNOTSUPP;
@@ -918,7 +919,10 @@ xfs_file_fallocate(
918 goto out_unlock; 919 goto out_unlock;
919 } 920 }
920 921
921 error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); 922 if (file->f_flags & O_DSYNC)
923 attr_flags |= XFS_ATTR_SYNC;
924
925 error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
922 if (error) 926 if (error)
923 goto out_unlock; 927 goto out_unlock;
924 928
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 0ca0e3c024d7..acca2c5ca3fa 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -624,6 +624,10 @@ xfs_ioc_space(
624 624
625 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 625 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
626 attr_flags |= XFS_ATTR_NONBLOCK; 626 attr_flags |= XFS_ATTR_NONBLOCK;
627
628 if (filp->f_flags & O_DSYNC)
629 attr_flags |= XFS_ATTR_SYNC;
630
627 if (ioflags & IO_INVIS) 631 if (ioflags & IO_INVIS)
628 attr_flags |= XFS_ATTR_DMI; 632 attr_flags |= XFS_ATTR_DMI;
629 633
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9ff7fc603d2f..dd21784525a8 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -70,7 +70,7 @@ xfs_synchronize_times(
70 70
71/* 71/*
72 * If the linux inode is valid, mark it dirty. 72 * If the linux inode is valid, mark it dirty.
73 * Used when commiting a dirty inode into a transaction so that 73 * Used when committing a dirty inode into a transaction so that
74 * the inode will get written back by the linux code 74 * the inode will get written back by the linux code
75 */ 75 */
76void 76void
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 096494997747..244be9cbfe78 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -39,7 +39,6 @@
39#include <mrlock.h> 39#include <mrlock.h>
40#include <time.h> 40#include <time.h>
41 41
42#include <support/debug.h>
43#include <support/uuid.h> 42#include <support/uuid.h>
44 43
45#include <linux/semaphore.h> 44#include <linux/semaphore.h>
@@ -86,6 +85,7 @@
86#include <xfs_aops.h> 85#include <xfs_aops.h>
87#include <xfs_super.h> 86#include <xfs_super.h>
88#include <xfs_buf.h> 87#include <xfs_buf.h>
88#include <xfs_message.h>
89 89
90/* 90/*
91 * Feature macros (disable/enable) 91 * Feature macros (disable/enable)
@@ -280,4 +280,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
280#define __arch_pack 280#define __arch_pack
281#endif 281#endif
282 282
283#define ASSERT_ALWAYS(expr) \
284 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
285
286#ifndef DEBUG
287#define ASSERT(expr) ((void)0)
288
289#ifndef STATIC
290# define STATIC static noinline
291#endif
292
293#else /* DEBUG */
294
295#define ASSERT(expr) \
296 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
297
298#ifndef STATIC
299# define STATIC noinline
300#endif
301
302#endif /* DEBUG */
303
283#endif /* __XFS_LINUX__ */ 304#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
new file mode 100644
index 000000000000..9f76cceb678d
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -0,0 +1,126 @@
1/*
2 * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_mount.h"
27
28/*
29 * XFS logging functions
30 */
31static void
32__xfs_printk(
33 const char *level,
34 const struct xfs_mount *mp,
35 struct va_format *vaf)
36{
37 if (mp && mp->m_fsname) {
38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return;
40 }
41 printk("%sXFS: %pV\n", level, vaf);
42}
43
44void xfs_printk(
45 const char *level,
46 const struct xfs_mount *mp,
47 const char *fmt, ...)
48{
49 struct va_format vaf;
50 va_list args;
51
52 va_start(args, fmt);
53
54 vaf.fmt = fmt;
55 vaf.va = &args;
56
57 __xfs_printk(level, mp, &vaf);
58 va_end(args);
59}
60
61#define define_xfs_printk_level(func, kern_level) \
62void func(const struct xfs_mount *mp, const char *fmt, ...) \
63{ \
64 struct va_format vaf; \
65 va_list args; \
66 \
67 va_start(args, fmt); \
68 \
69 vaf.fmt = fmt; \
70 vaf.va = &args; \
71 \
72 __xfs_printk(kern_level, mp, &vaf); \
73 va_end(args); \
74} \
75
76define_xfs_printk_level(xfs_emerg, KERN_EMERG);
77define_xfs_printk_level(xfs_alert, KERN_ALERT);
78define_xfs_printk_level(xfs_crit, KERN_CRIT);
79define_xfs_printk_level(xfs_err, KERN_ERR);
80define_xfs_printk_level(xfs_warn, KERN_WARNING);
81define_xfs_printk_level(xfs_notice, KERN_NOTICE);
82define_xfs_printk_level(xfs_info, KERN_INFO);
83#ifdef DEBUG
84define_xfs_printk_level(xfs_debug, KERN_DEBUG);
85#endif
86
87void
88xfs_alert_tag(
89 const struct xfs_mount *mp,
90 int panic_tag,
91 const char *fmt, ...)
92{
93 struct va_format vaf;
94 va_list args;
95 int do_panic = 0;
96
97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
98 xfs_printk(KERN_ALERT, mp,
99 "XFS: Transforming an alert into a BUG.");
100 do_panic = 1;
101 }
102
103 va_start(args, fmt);
104
105 vaf.fmt = fmt;
106 vaf.va = &args;
107
108 __xfs_printk(KERN_ALERT, mp, &vaf);
109 va_end(args);
110
111 BUG_ON(do_panic);
112}
113
114void
115assfail(char *expr, char *file, int line)
116{
117 xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
118 expr, file, line);
119 BUG();
120}
121
122void
123xfs_hex_dump(void *p, int length)
124{
125 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
126}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
new file mode 100644
index 000000000000..f1b3fc1b6c4e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -0,0 +1,40 @@
1#ifndef __XFS_MESSAGE_H
2#define __XFS_MESSAGE_H 1
3
4struct xfs_mount;
5
6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3)));
11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3)));
13extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4)));
16extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3)));
18extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3)));
20extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3)));
22extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3)));
24extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3)));
26
27#ifdef DEBUG
28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3)));
30#else
31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{
33}
34#endif
35
36extern void assfail(char *expr, char *f, int l);
37
38extern void xfs_hex_dump(void *p, int length);
39
40#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9731898083ae..b38e58d02299 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -173,6 +173,15 @@ xfs_parseargs(
173 __uint8_t iosizelog = 0; 173 __uint8_t iosizelog = 0;
174 174
175 /* 175 /*
176 * set up the mount name first so all the errors will refer to the
177 * correct device.
178 */
179 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
180 if (!mp->m_fsname)
181 return ENOMEM;
182 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
183
184 /*
176 * Copy binary VFS mount flags we are interested in. 185 * Copy binary VFS mount flags we are interested in.
177 */ 186 */
178 if (sb->s_flags & MS_RDONLY) 187 if (sb->s_flags & MS_RDONLY)
@@ -189,6 +198,7 @@ xfs_parseargs(
189 mp->m_flags |= XFS_MOUNT_BARRIER; 198 mp->m_flags |= XFS_MOUNT_BARRIER;
190 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 199 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
191 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 200 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
201 mp->m_flags |= XFS_MOUNT_DELAYLOG;
192 202
193 /* 203 /*
194 * These can be overridden by the mount option parsing. 204 * These can be overridden by the mount option parsing.
@@ -207,24 +217,21 @@ xfs_parseargs(
207 217
208 if (!strcmp(this_char, MNTOPT_LOGBUFS)) { 218 if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
209 if (!value || !*value) { 219 if (!value || !*value) {
210 cmn_err(CE_WARN, 220 xfs_warn(mp, "%s option requires an argument",
211 "XFS: %s option requires an argument",
212 this_char); 221 this_char);
213 return EINVAL; 222 return EINVAL;
214 } 223 }
215 mp->m_logbufs = simple_strtoul(value, &eov, 10); 224 mp->m_logbufs = simple_strtoul(value, &eov, 10);
216 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 225 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
217 if (!value || !*value) { 226 if (!value || !*value) {
218 cmn_err(CE_WARN, 227 xfs_warn(mp, "%s option requires an argument",
219 "XFS: %s option requires an argument",
220 this_char); 228 this_char);
221 return EINVAL; 229 return EINVAL;
222 } 230 }
223 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 231 mp->m_logbsize = suffix_strtoul(value, &eov, 10);
224 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 232 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
225 if (!value || !*value) { 233 if (!value || !*value) {
226 cmn_err(CE_WARN, 234 xfs_warn(mp, "%s option requires an argument",
227 "XFS: %s option requires an argument",
228 this_char); 235 this_char);
229 return EINVAL; 236 return EINVAL;
230 } 237 }
@@ -232,14 +239,12 @@ xfs_parseargs(
232 if (!mp->m_logname) 239 if (!mp->m_logname)
233 return ENOMEM; 240 return ENOMEM;
234 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 241 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
235 cmn_err(CE_WARN, 242 xfs_warn(mp, "%s option not allowed on this system",
236 "XFS: %s option not allowed on this system",
237 this_char); 243 this_char);
238 return EINVAL; 244 return EINVAL;
239 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 245 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
240 if (!value || !*value) { 246 if (!value || !*value) {
241 cmn_err(CE_WARN, 247 xfs_warn(mp, "%s option requires an argument",
242 "XFS: %s option requires an argument",
243 this_char); 248 this_char);
244 return EINVAL; 249 return EINVAL;
245 } 250 }
@@ -248,8 +253,7 @@ xfs_parseargs(
248 return ENOMEM; 253 return ENOMEM;
249 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 254 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
250 if (!value || !*value) { 255 if (!value || !*value) {
251 cmn_err(CE_WARN, 256 xfs_warn(mp, "%s option requires an argument",
252 "XFS: %s option requires an argument",
253 this_char); 257 this_char);
254 return EINVAL; 258 return EINVAL;
255 } 259 }
@@ -257,8 +261,7 @@ xfs_parseargs(
257 iosizelog = ffs(iosize) - 1; 261 iosizelog = ffs(iosize) - 1;
258 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 262 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
259 if (!value || !*value) { 263 if (!value || !*value) {
260 cmn_err(CE_WARN, 264 xfs_warn(mp, "%s option requires an argument",
261 "XFS: %s option requires an argument",
262 this_char); 265 this_char);
263 return EINVAL; 266 return EINVAL;
264 } 267 }
@@ -280,16 +283,14 @@ xfs_parseargs(
280 mp->m_flags |= XFS_MOUNT_SWALLOC; 283 mp->m_flags |= XFS_MOUNT_SWALLOC;
281 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 284 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
282 if (!value || !*value) { 285 if (!value || !*value) {
283 cmn_err(CE_WARN, 286 xfs_warn(mp, "%s option requires an argument",
284 "XFS: %s option requires an argument",
285 this_char); 287 this_char);
286 return EINVAL; 288 return EINVAL;
287 } 289 }
288 dsunit = simple_strtoul(value, &eov, 10); 290 dsunit = simple_strtoul(value, &eov, 10);
289 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 291 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
290 if (!value || !*value) { 292 if (!value || !*value) {
291 cmn_err(CE_WARN, 293 xfs_warn(mp, "%s option requires an argument",
292 "XFS: %s option requires an argument",
293 this_char); 294 this_char);
294 return EINVAL; 295 return EINVAL;
295 } 296 }
@@ -297,8 +298,7 @@ xfs_parseargs(
297 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 298 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 299 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299#if !XFS_BIG_INUMS 300#if !XFS_BIG_INUMS
300 cmn_err(CE_WARN, 301 xfs_warn(mp, "%s option not allowed on this system",
301 "XFS: %s option not allowed on this system",
302 this_char); 302 this_char);
303 return EINVAL; 303 return EINVAL;
304#endif 304#endif
@@ -356,20 +356,19 @@ xfs_parseargs(
356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
358 } else if (!strcmp(this_char, "ihashsize")) { 358 } else if (!strcmp(this_char, "ihashsize")) {
359 cmn_err(CE_WARN, 359 xfs_warn(mp,
360 "XFS: ihashsize no longer used, option is deprecated."); 360 "ihashsize no longer used, option is deprecated.");
361 } else if (!strcmp(this_char, "osyncisdsync")) { 361 } else if (!strcmp(this_char, "osyncisdsync")) {
362 cmn_err(CE_WARN, 362 xfs_warn(mp,
363 "XFS: osyncisdsync has no effect, option is deprecated."); 363 "osyncisdsync has no effect, option is deprecated.");
364 } else if (!strcmp(this_char, "osyncisosync")) { 364 } else if (!strcmp(this_char, "osyncisosync")) {
365 cmn_err(CE_WARN, 365 xfs_warn(mp,
366 "XFS: osyncisosync has no effect, option is deprecated."); 366 "osyncisosync has no effect, option is deprecated.");
367 } else if (!strcmp(this_char, "irixsgid")) { 367 } else if (!strcmp(this_char, "irixsgid")) {
368 cmn_err(CE_WARN, 368 xfs_warn(mp,
369 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); 369 "irixsgid is now a sysctl(2) variable, option is deprecated.");
370 } else { 370 } else {
371 cmn_err(CE_WARN, 371 xfs_warn(mp, "unknown mount option [%s].", this_char);
372 "XFS: unknown mount option [%s].", this_char);
373 return EINVAL; 372 return EINVAL;
374 } 373 }
375 } 374 }
@@ -379,40 +378,37 @@ xfs_parseargs(
379 */ 378 */
380 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && 379 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
381 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 380 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
382 cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); 381 xfs_warn(mp, "no-recovery mounts must be read-only.");
383 return EINVAL; 382 return EINVAL;
384 } 383 }
385 384
386 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { 385 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
387 cmn_err(CE_WARN, 386 xfs_warn(mp,
388 "XFS: sunit and swidth options incompatible with the noalign option"); 387 "sunit and swidth options incompatible with the noalign option");
389 return EINVAL; 388 return EINVAL;
390 } 389 }
391 390
392#ifndef CONFIG_XFS_QUOTA 391#ifndef CONFIG_XFS_QUOTA
393 if (XFS_IS_QUOTA_RUNNING(mp)) { 392 if (XFS_IS_QUOTA_RUNNING(mp)) {
394 cmn_err(CE_WARN, 393 xfs_warn(mp, "quota support not available in this kernel.");
395 "XFS: quota support not available in this kernel.");
396 return EINVAL; 394 return EINVAL;
397 } 395 }
398#endif 396#endif
399 397
400 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && 398 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
401 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { 399 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
402 cmn_err(CE_WARN, 400 xfs_warn(mp, "cannot mount with both project and group quota");
403 "XFS: cannot mount with both project and group quota");
404 return EINVAL; 401 return EINVAL;
405 } 402 }
406 403
407 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 404 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
408 cmn_err(CE_WARN, 405 xfs_warn(mp, "sunit and swidth must be specified together");
409 "XFS: sunit and swidth must be specified together");
410 return EINVAL; 406 return EINVAL;
411 } 407 }
412 408
413 if (dsunit && (dswidth % dsunit != 0)) { 409 if (dsunit && (dswidth % dsunit != 0)) {
414 cmn_err(CE_WARN, 410 xfs_warn(mp,
415 "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", 411 "stripe width (%d) must be a multiple of the stripe unit (%d)",
416 dswidth, dsunit); 412 dswidth, dsunit);
417 return EINVAL; 413 return EINVAL;
418 } 414 }
@@ -438,8 +434,7 @@ done:
438 mp->m_logbufs != 0 && 434 mp->m_logbufs != 0 &&
439 (mp->m_logbufs < XLOG_MIN_ICLOGS || 435 (mp->m_logbufs < XLOG_MIN_ICLOGS ||
440 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 436 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
441 cmn_err(CE_WARN, 437 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
442 "XFS: invalid logbufs value: %d [not %d-%d]",
443 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 438 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
444 return XFS_ERROR(EINVAL); 439 return XFS_ERROR(EINVAL);
445 } 440 }
@@ -448,22 +443,16 @@ done:
448 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 443 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
449 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 444 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
450 !is_power_of_2(mp->m_logbsize))) { 445 !is_power_of_2(mp->m_logbsize))) {
451 cmn_err(CE_WARN, 446 xfs_warn(mp,
452 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 447 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
453 mp->m_logbsize); 448 mp->m_logbsize);
454 return XFS_ERROR(EINVAL); 449 return XFS_ERROR(EINVAL);
455 } 450 }
456 451
457 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
458 if (!mp->m_fsname)
459 return ENOMEM;
460 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
461
462 if (iosizelog) { 452 if (iosizelog) {
463 if (iosizelog > XFS_MAX_IO_LOG || 453 if (iosizelog > XFS_MAX_IO_LOG ||
464 iosizelog < XFS_MIN_IO_LOG) { 454 iosizelog < XFS_MIN_IO_LOG) {
465 cmn_err(CE_WARN, 455 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
466 "XFS: invalid log iosize: %d [not %d-%d]",
467 iosizelog, XFS_MIN_IO_LOG, 456 iosizelog, XFS_MIN_IO_LOG,
468 XFS_MAX_IO_LOG); 457 XFS_MAX_IO_LOG);
469 return XFS_ERROR(EINVAL); 458 return XFS_ERROR(EINVAL);
@@ -610,7 +599,7 @@ xfs_blkdev_get(
610 mp); 599 mp);
611 if (IS_ERR(*bdevp)) { 600 if (IS_ERR(*bdevp)) {
612 error = PTR_ERR(*bdevp); 601 error = PTR_ERR(*bdevp);
613 printk("XFS: Invalid device [%s], error=%d\n", name, error); 602 xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
614 } 603 }
615 604
616 return -error; 605 return -error;
@@ -664,23 +653,23 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
664 int error; 653 int error;
665 654
666 if (mp->m_logdev_targp != mp->m_ddev_targp) { 655 if (mp->m_logdev_targp != mp->m_ddev_targp) {
667 xfs_fs_cmn_err(CE_NOTE, mp, 656 xfs_notice(mp,
668 "Disabling barriers, not supported with external log device"); 657 "Disabling barriers, not supported with external log device");
669 mp->m_flags &= ~XFS_MOUNT_BARRIER; 658 mp->m_flags &= ~XFS_MOUNT_BARRIER;
670 return; 659 return;
671 } 660 }
672 661
673 if (xfs_readonly_buftarg(mp->m_ddev_targp)) { 662 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
674 xfs_fs_cmn_err(CE_NOTE, mp, 663 xfs_notice(mp,
675 "Disabling barriers, underlying device is readonly"); 664 "Disabling barriers, underlying device is readonly");
676 mp->m_flags &= ~XFS_MOUNT_BARRIER; 665 mp->m_flags &= ~XFS_MOUNT_BARRIER;
677 return; 666 return;
678 } 667 }
679 668
680 error = xfs_barrier_test(mp); 669 error = xfs_barrier_test(mp);
681 if (error) { 670 if (error) {
682 xfs_fs_cmn_err(CE_NOTE, mp, 671 xfs_notice(mp,
683 "Disabling barriers, trial barrier write failed"); 672 "Disabling barriers, trial barrier write failed");
684 mp->m_flags &= ~XFS_MOUNT_BARRIER; 673 mp->m_flags &= ~XFS_MOUNT_BARRIER;
685 return; 674 return;
686 } 675 }
@@ -743,8 +732,8 @@ xfs_open_devices(
743 goto out_close_logdev; 732 goto out_close_logdev;
744 733
745 if (rtdev == ddev || rtdev == logdev) { 734 if (rtdev == ddev || rtdev == logdev) {
746 cmn_err(CE_WARN, 735 xfs_warn(mp,
747 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); 736 "Cannot mount filesystem with identical rtdev and ddev/logdev.");
748 error = EINVAL; 737 error = EINVAL;
749 goto out_close_rtdev; 738 goto out_close_rtdev;
750 } 739 }
@@ -827,75 +816,6 @@ xfs_setup_devices(
827 return 0; 816 return 0;
828} 817}
829 818
830/*
831 * XFS AIL push thread support
832 */
833void
834xfsaild_wakeup(
835 struct xfs_ail *ailp,
836 xfs_lsn_t threshold_lsn)
837{
838 /* only ever move the target forwards */
839 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
840 ailp->xa_target = threshold_lsn;
841 wake_up_process(ailp->xa_task);
842 }
843}
844
845STATIC int
846xfsaild(
847 void *data)
848{
849 struct xfs_ail *ailp = data;
850 xfs_lsn_t last_pushed_lsn = 0;
851 long tout = 0; /* milliseconds */
852
853 while (!kthread_should_stop()) {
854 /*
855 * for short sleeps indicating congestion, don't allow us to
856 * get woken early. Otherwise all we do is bang on the AIL lock
857 * without making progress.
858 */
859 if (tout && tout <= 20)
860 __set_current_state(TASK_KILLABLE);
861 else
862 __set_current_state(TASK_INTERRUPTIBLE);
863 schedule_timeout(tout ?
864 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
865
866 /* swsusp */
867 try_to_freeze();
868
869 ASSERT(ailp->xa_mount->m_log);
870 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
871 continue;
872
873 tout = xfsaild_push(ailp, &last_pushed_lsn);
874 }
875
876 return 0;
877} /* xfsaild */
878
879int
880xfsaild_start(
881 struct xfs_ail *ailp)
882{
883 ailp->xa_target = 0;
884 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
885 ailp->xa_mount->m_fsname);
886 if (IS_ERR(ailp->xa_task))
887 return -PTR_ERR(ailp->xa_task);
888 return 0;
889}
890
891void
892xfsaild_stop(
893 struct xfs_ail *ailp)
894{
895 kthread_stop(ailp->xa_task);
896}
897
898
899/* Catch misguided souls that try to use this interface on XFS */ 819/* Catch misguided souls that try to use this interface on XFS */
900STATIC struct inode * 820STATIC struct inode *
901xfs_fs_alloc_inode( 821xfs_fs_alloc_inode(
@@ -1089,7 +1009,7 @@ xfs_fs_write_inode(
1089 error = 0; 1009 error = 0;
1090 goto out_unlock; 1010 goto out_unlock;
1091 } 1011 }
1092 error = xfs_iflush(ip, 0); 1012 error = xfs_iflush(ip, SYNC_TRYLOCK);
1093 } 1013 }
1094 1014
1095 out_unlock: 1015 out_unlock:
@@ -1202,22 +1122,12 @@ xfs_fs_sync_fs(
1202 return -error; 1122 return -error;
1203 1123
1204 if (laptop_mode) { 1124 if (laptop_mode) {
1205 int prev_sync_seq = mp->m_sync_seq;
1206
1207 /* 1125 /*
1208 * The disk must be active because we're syncing. 1126 * The disk must be active because we're syncing.
1209 * We schedule xfssyncd now (now that the disk is 1127 * We schedule xfssyncd now (now that the disk is
1210 * active) instead of later (when it might not be). 1128 * active) instead of later (when it might not be).
1211 */ 1129 */
1212 wake_up_process(mp->m_sync_task); 1130 flush_delayed_work_sync(&mp->m_sync_work);
1213 /*
1214 * We have to wait for the sync iteration to complete.
1215 * If we don't, the disk activity caused by the sync
1216 * will come after the sync is completed, and that
1217 * triggers another sync from laptop mode.
1218 */
1219 wait_event(mp->m_wait_single_sync_task,
1220 mp->m_sync_seq != prev_sync_seq);
1221 } 1131 }
1222 1132
1223 return 0; 1133 return 0;
@@ -1345,8 +1255,8 @@ xfs_fs_remount(
1345 * options that we can't actually change. 1255 * options that we can't actually change.
1346 */ 1256 */
1347#if 0 1257#if 0
1348 printk(KERN_INFO 1258 xfs_info(mp,
1349 "XFS: mount option \"%s\" not supported for remount\n", p); 1259 "mount option \"%s\" not supported for remount\n", p);
1350 return -EINVAL; 1260 return -EINVAL;
1351#else 1261#else
1352 break; 1262 break;
@@ -1367,8 +1277,7 @@ xfs_fs_remount(
1367 if (mp->m_update_flags) { 1277 if (mp->m_update_flags) {
1368 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1278 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1369 if (error) { 1279 if (error) {
1370 cmn_err(CE_WARN, 1280 xfs_warn(mp, "failed to write sb changes");
1371 "XFS: failed to write sb changes");
1372 return error; 1281 return error;
1373 } 1282 }
1374 mp->m_update_flags = 0; 1283 mp->m_update_flags = 0;
@@ -1452,15 +1361,15 @@ xfs_finish_flags(
1452 mp->m_logbsize = mp->m_sb.sb_logsunit; 1361 mp->m_logbsize = mp->m_sb.sb_logsunit;
1453 } else if (mp->m_logbsize > 0 && 1362 } else if (mp->m_logbsize > 0 &&
1454 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1363 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1455 cmn_err(CE_WARN, 1364 xfs_warn(mp,
1456 "XFS: logbuf size must be greater than or equal to log stripe size"); 1365 "logbuf size must be greater than or equal to log stripe size");
1457 return XFS_ERROR(EINVAL); 1366 return XFS_ERROR(EINVAL);
1458 } 1367 }
1459 } else { 1368 } else {
1460 /* Fail a mount if the logbuf is larger than 32K */ 1369 /* Fail a mount if the logbuf is larger than 32K */
1461 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1370 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1462 cmn_err(CE_WARN, 1371 xfs_warn(mp,
1463 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 1372 "logbuf size for version 1 logs must be 16K or 32K");
1464 return XFS_ERROR(EINVAL); 1373 return XFS_ERROR(EINVAL);
1465 } 1374 }
1466 } 1375 }
@@ -1477,8 +1386,8 @@ xfs_finish_flags(
1477 * prohibit r/w mounts of read-only filesystems 1386 * prohibit r/w mounts of read-only filesystems
1478 */ 1387 */
1479 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { 1388 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1480 cmn_err(CE_WARN, 1389 xfs_warn(mp,
1481 "XFS: cannot mount a read-only filesystem as read-write"); 1390 "cannot mount a read-only filesystem as read-write");
1482 return XFS_ERROR(EROFS); 1391 return XFS_ERROR(EROFS);
1483 } 1392 }
1484 1393
@@ -1502,9 +1411,6 @@ xfs_fs_fill_super(
1502 spin_lock_init(&mp->m_sb_lock); 1411 spin_lock_init(&mp->m_sb_lock);
1503 mutex_init(&mp->m_growlock); 1412 mutex_init(&mp->m_growlock);
1504 atomic_set(&mp->m_active_trans, 0); 1413 atomic_set(&mp->m_active_trans, 0);
1505 INIT_LIST_HEAD(&mp->m_sync_list);
1506 spin_lock_init(&mp->m_sync_lock);
1507 init_waitqueue_head(&mp->m_wait_single_sync_task);
1508 1414
1509 mp->m_super = sb; 1415 mp->m_super = sb;
1510 sb->s_fs_info = mp; 1416 sb->s_fs_info = mp;
@@ -1551,10 +1457,14 @@ xfs_fs_fill_super(
1551 if (error) 1457 if (error)
1552 goto out_free_sb; 1458 goto out_free_sb;
1553 1459
1554 error = xfs_mountfs(mp); 1460 /*
1555 if (error) 1461 * we must configure the block size in the superblock before we run the
1556 goto out_filestream_unmount; 1462 * full mount process as the mount process can lookup and cache inodes.
1557 1463 * For the same reason we must also initialise the syncd and register
1464 * the inode cache shrinker so that inodes can be reclaimed during
1465 * operations like a quotacheck that iterate all inodes in the
1466 * filesystem.
1467 */
1558 sb->s_magic = XFS_SB_MAGIC; 1468 sb->s_magic = XFS_SB_MAGIC;
1559 sb->s_blocksize = mp->m_sb.sb_blocksize; 1469 sb->s_blocksize = mp->m_sb.sb_blocksize;
1560 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1470 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1562,6 +1472,16 @@ xfs_fs_fill_super(
1562 sb->s_time_gran = 1; 1472 sb->s_time_gran = 1;
1563 set_posix_acl_flag(sb); 1473 set_posix_acl_flag(sb);
1564 1474
1475 error = xfs_syncd_init(mp);
1476 if (error)
1477 goto out_filestream_unmount;
1478
1479 xfs_inode_shrinker_register(mp);
1480
1481 error = xfs_mountfs(mp);
1482 if (error)
1483 goto out_syncd_stop;
1484
1565 root = igrab(VFS_I(mp->m_rootip)); 1485 root = igrab(VFS_I(mp->m_rootip));
1566 if (!root) { 1486 if (!root) {
1567 error = ENOENT; 1487 error = ENOENT;
@@ -1577,14 +1497,11 @@ xfs_fs_fill_super(
1577 goto fail_vnrele; 1497 goto fail_vnrele;
1578 } 1498 }
1579 1499
1580 error = xfs_syncd_init(mp);
1581 if (error)
1582 goto fail_vnrele;
1583
1584 xfs_inode_shrinker_register(mp);
1585
1586 return 0; 1500 return 0;
1587 1501
1502 out_syncd_stop:
1503 xfs_inode_shrinker_unregister(mp);
1504 xfs_syncd_stop(mp);
1588 out_filestream_unmount: 1505 out_filestream_unmount:
1589 xfs_filestream_unmount(mp); 1506 xfs_filestream_unmount(mp);
1590 out_free_sb: 1507 out_free_sb:
@@ -1608,6 +1525,9 @@ xfs_fs_fill_super(
1608 } 1525 }
1609 1526
1610 fail_unmount: 1527 fail_unmount:
1528 xfs_inode_shrinker_unregister(mp);
1529 xfs_syncd_stop(mp);
1530
1611 /* 1531 /*
1612 * Blow away any referenced inode in the filestreams cache. 1532 * Blow away any referenced inode in the filestreams cache.
1613 * This can and will cause log traffic as inodes go inactive 1533 * This can and will cause log traffic as inodes go inactive
@@ -1797,6 +1717,38 @@ xfs_destroy_zones(void)
1797} 1717}
1798 1718
1799STATIC int __init 1719STATIC int __init
1720xfs_init_workqueues(void)
1721{
1722 /*
1723 * max_active is set to 8 to give enough concurency to allow
1724 * multiple work operations on each CPU to run. This allows multiple
1725 * filesystems to be running sync work concurrently, and scales with
1726 * the number of CPUs in the system.
1727 */
1728 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1729 if (!xfs_syncd_wq)
1730 goto out;
1731
1732 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1733 if (!xfs_ail_wq)
1734 goto out_destroy_syncd;
1735
1736 return 0;
1737
1738out_destroy_syncd:
1739 destroy_workqueue(xfs_syncd_wq);
1740out:
1741 return -ENOMEM;
1742}
1743
1744STATIC void
1745xfs_destroy_workqueues(void)
1746{
1747 destroy_workqueue(xfs_ail_wq);
1748 destroy_workqueue(xfs_syncd_wq);
1749}
1750
1751STATIC int __init
1800init_xfs_fs(void) 1752init_xfs_fs(void)
1801{ 1753{
1802 int error; 1754 int error;
@@ -1811,10 +1763,14 @@ init_xfs_fs(void)
1811 if (error) 1763 if (error)
1812 goto out; 1764 goto out;
1813 1765
1814 error = xfs_mru_cache_init(); 1766 error = xfs_init_workqueues();
1815 if (error) 1767 if (error)
1816 goto out_destroy_zones; 1768 goto out_destroy_zones;
1817 1769
1770 error = xfs_mru_cache_init();
1771 if (error)
1772 goto out_destroy_wq;
1773
1818 error = xfs_filestream_init(); 1774 error = xfs_filestream_init();
1819 if (error) 1775 if (error)
1820 goto out_mru_cache_uninit; 1776 goto out_mru_cache_uninit;
@@ -1831,6 +1787,10 @@ init_xfs_fs(void)
1831 if (error) 1787 if (error)
1832 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1833 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1834 vfs_initquota(); 1794 vfs_initquota();
1835 1795
1836 error = register_filesystem(&xfs_fs_type); 1796 error = register_filesystem(&xfs_fs_type);
@@ -1848,6 +1808,8 @@ init_xfs_fs(void)
1848 xfs_filestream_uninit(); 1808 xfs_filestream_uninit();
1849 out_mru_cache_uninit: 1809 out_mru_cache_uninit:
1850 xfs_mru_cache_uninit(); 1810 xfs_mru_cache_uninit();
1811 out_destroy_wq:
1812 xfs_destroy_workqueues();
1851 out_destroy_zones: 1813 out_destroy_zones:
1852 xfs_destroy_zones(); 1814 xfs_destroy_zones();
1853 out: 1815 out:
@@ -1864,6 +1826,7 @@ exit_xfs_fs(void)
1864 xfs_buf_terminate(); 1826 xfs_buf_terminate();
1865 xfs_filestream_uninit(); 1827 xfs_filestream_uninit();
1866 xfs_mru_cache_uninit(); 1828 xfs_mru_cache_uninit();
1829 xfs_destroy_workqueues();
1867 xfs_destroy_zones(); 1830 xfs_destroy_zones();
1868} 1831}
1869 1832
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e22f0057d21f..e4f9c1b0836c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
@@ -39,6 +40,8 @@
39#include <linux/kthread.h> 40#include <linux/kthread.h>
40#include <linux/freezer.h> 41#include <linux/freezer.h>
41 42
43struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
44
42/* 45/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and 46 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between 47 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -401,7 +404,7 @@ xfs_quiesce_fs(
401/* 404/*
402 * Second stage of a quiesce. The data is already synced, now we have to take 405 * Second stage of a quiesce. The data is already synced, now we have to take
403 * care of the metadata. New transactions are already blocked, so we need to 406 * care of the metadata. New transactions are already blocked, so we need to
404 * wait for any remaining transactions to drain out before proceding. 407 * wait for any remaining transactions to drain out before proceeding.
405 */ 408 */
406void 409void
407xfs_quiesce_attr( 410xfs_quiesce_attr(
@@ -425,69 +428,18 @@ xfs_quiesce_attr(
425 /* Push the superblock and write an unmount record */ 428 /* Push the superblock and write an unmount record */
426 error = xfs_log_sbcount(mp, 1); 429 error = xfs_log_sbcount(mp, 1);
427 if (error) 430 if (error)
428 xfs_fs_cmn_err(CE_WARN, mp, 431 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
429 "xfs_attr_quiesce: failed to log sb changes. "
430 "Frozen image may not be consistent."); 432 "Frozen image may not be consistent.");
431 xfs_log_unmount_write(mp); 433 xfs_log_unmount_write(mp);
432 xfs_unmountfs_writesb(mp); 434 xfs_unmountfs_writesb(mp);
433} 435}
434 436
435/* 437static void
436 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 438xfs_syncd_queue_sync(
437 * Doing this has two advantages: 439 struct xfs_mount *mp)
438 * - It saves on stack space, which is tight in certain situations
439 * - It can be used (with care) as a mechanism to avoid deadlocks.
440 * Flushing while allocating in a full filesystem requires both.
441 */
442STATIC void
443xfs_syncd_queue_work(
444 struct xfs_mount *mp,
445 void *data,
446 void (*syncer)(struct xfs_mount *, void *),
447 struct completion *completion)
448{
449 struct xfs_sync_work *work;
450
451 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
452 INIT_LIST_HEAD(&work->w_list);
453 work->w_syncer = syncer;
454 work->w_data = data;
455 work->w_mount = mp;
456 work->w_completion = completion;
457 spin_lock(&mp->m_sync_lock);
458 list_add_tail(&work->w_list, &mp->m_sync_list);
459 spin_unlock(&mp->m_sync_lock);
460 wake_up_process(mp->m_sync_task);
461}
462
463/*
464 * Flush delayed allocate data, attempting to free up reserved space
465 * from existing allocations. At this point a new allocation attempt
466 * has failed with ENOSPC and we are in the process of scratching our
467 * heads, looking about for more room...
468 */
469STATIC void
470xfs_flush_inodes_work(
471 struct xfs_mount *mp,
472 void *arg)
473{
474 struct inode *inode = arg;
475 xfs_sync_data(mp, SYNC_TRYLOCK);
476 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
477 iput(inode);
478}
479
480void
481xfs_flush_inodes(
482 xfs_inode_t *ip)
483{ 440{
484 struct inode *inode = VFS_I(ip); 441 queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
485 DECLARE_COMPLETION_ONSTACK(completion); 442 msecs_to_jiffies(xfs_syncd_centisecs * 10));
486
487 igrab(inode);
488 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
489 wait_for_completion(&completion);
490 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
491} 443}
492 444
493/* 445/*
@@ -497,9 +449,10 @@ xfs_flush_inodes(
497 */ 449 */
498STATIC void 450STATIC void
499xfs_sync_worker( 451xfs_sync_worker(
500 struct xfs_mount *mp, 452 struct work_struct *work)
501 void *unused)
502{ 453{
454 struct xfs_mount *mp = container_of(to_delayed_work(work),
455 struct xfs_mount, m_sync_work);
503 int error; 456 int error;
504 457
505 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 458 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -509,73 +462,106 @@ xfs_sync_worker(
509 error = xfs_fs_log_dummy(mp); 462 error = xfs_fs_log_dummy(mp);
510 else 463 else
511 xfs_log_force(mp, 0); 464 xfs_log_force(mp, 0);
512 xfs_reclaim_inodes(mp, 0);
513 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 465 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
466
467 /* start pushing all the metadata that is currently dirty */
468 xfs_ail_push_all(mp->m_ail);
514 } 469 }
515 mp->m_sync_seq++; 470
516 wake_up(&mp->m_wait_single_sync_task); 471 /* queue us up again */
472 xfs_syncd_queue_sync(mp);
517} 473}
518 474
519STATIC int 475/*
520xfssyncd( 476 * Queue a new inode reclaim pass if there are reclaimable inodes and there
521 void *arg) 477 * isn't a reclaim pass already in progress. By default it runs every 5s based
478 * on the xfs syncd work default of 30s. Perhaps this should have it's own
479 * tunable, but that can be done if this method proves to be ineffective or too
480 * aggressive.
481 */
482static void
483xfs_syncd_queue_reclaim(
484 struct xfs_mount *mp)
522{ 485{
523 struct xfs_mount *mp = arg;
524 long timeleft;
525 xfs_sync_work_t *work, *n;
526 LIST_HEAD (tmp);
527
528 set_freezable();
529 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
530 for (;;) {
531 if (list_empty(&mp->m_sync_list))
532 timeleft = schedule_timeout_interruptible(timeleft);
533 /* swsusp */
534 try_to_freeze();
535 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
536 break;
537 486
538 spin_lock(&mp->m_sync_lock); 487 /*
539 /* 488 * We can have inodes enter reclaim after we've shut down the syncd
540 * We can get woken by laptop mode, to do a sync - 489 * workqueue during unmount, so don't allow reclaim work to be queued
541 * that's the (only!) case where the list would be 490 * during unmount.
542 * empty with time remaining. 491 */
543 */ 492 if (!(mp->m_super->s_flags & MS_ACTIVE))
544 if (!timeleft || list_empty(&mp->m_sync_list)) { 493 return;
545 if (!timeleft)
546 timeleft = xfs_syncd_centisecs *
547 msecs_to_jiffies(10);
548 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
549 list_add_tail(&mp->m_sync_work.w_list,
550 &mp->m_sync_list);
551 }
552 list_splice_init(&mp->m_sync_list, &tmp);
553 spin_unlock(&mp->m_sync_lock);
554 494
555 list_for_each_entry_safe(work, n, &tmp, w_list) { 495 rcu_read_lock();
556 (*work->w_syncer)(mp, work->w_data); 496 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
557 list_del(&work->w_list); 497 queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
558 if (work == &mp->m_sync_work) 498 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
559 continue;
560 if (work->w_completion)
561 complete(work->w_completion);
562 kmem_free(work);
563 }
564 } 499 }
500 rcu_read_unlock();
501}
565 502
566 return 0; 503/*
504 * This is a fast pass over the inode cache to try to get reclaim moving on as
505 * many inodes as possible in a short period of time. It kicks itself every few
506 * seconds, as well as being kicked by the inode cache shrinker when memory
507 * goes low. It scans as quickly as possible avoiding locked inodes or those
508 * already being flushed, and once done schedules a future pass.
509 */
510STATIC void
511xfs_reclaim_worker(
512 struct work_struct *work)
513{
514 struct xfs_mount *mp = container_of(to_delayed_work(work),
515 struct xfs_mount, m_reclaim_work);
516
517 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
518 xfs_syncd_queue_reclaim(mp);
519}
520
521/*
522 * Flush delayed allocate data, attempting to free up reserved space
523 * from existing allocations. At this point a new allocation attempt
524 * has failed with ENOSPC and we are in the process of scratching our
525 * heads, looking about for more room.
526 *
527 * Queue a new data flush if there isn't one already in progress and
528 * wait for completion of the flush. This means that we only ever have one
529 * inode flush in progress no matter how many ENOSPC events are occurring and
530 * so will prevent the system from bogging down due to every concurrent
531 * ENOSPC event scanning all the active inodes in the system for writeback.
532 */
533void
534xfs_flush_inodes(
535 struct xfs_inode *ip)
536{
537 struct xfs_mount *mp = ip->i_mount;
538
539 queue_work(xfs_syncd_wq, &mp->m_flush_work);
540 flush_work_sync(&mp->m_flush_work);
541}
542
543STATIC void
544xfs_flush_worker(
545 struct work_struct *work)
546{
547 struct xfs_mount *mp = container_of(work,
548 struct xfs_mount, m_flush_work);
549
550 xfs_sync_data(mp, SYNC_TRYLOCK);
551 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
567} 552}
568 553
569int 554int
570xfs_syncd_init( 555xfs_syncd_init(
571 struct xfs_mount *mp) 556 struct xfs_mount *mp)
572{ 557{
573 mp->m_sync_work.w_syncer = xfs_sync_worker; 558 INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
574 mp->m_sync_work.w_mount = mp; 559 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
575 mp->m_sync_work.w_completion = NULL; 560 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
576 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 561
577 if (IS_ERR(mp->m_sync_task)) 562 xfs_syncd_queue_sync(mp);
578 return -PTR_ERR(mp->m_sync_task); 563 xfs_syncd_queue_reclaim(mp);
564
579 return 0; 565 return 0;
580} 566}
581 567
@@ -583,7 +569,9 @@ void
583xfs_syncd_stop( 569xfs_syncd_stop(
584 struct xfs_mount *mp) 570 struct xfs_mount *mp)
585{ 571{
586 kthread_stop(mp->m_sync_task); 572 cancel_delayed_work_sync(&mp->m_sync_work);
573 cancel_delayed_work_sync(&mp->m_reclaim_work);
574 cancel_work_sync(&mp->m_flush_work);
587} 575}
588 576
589void 577void
@@ -602,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
602 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 590 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
603 XFS_ICI_RECLAIM_TAG); 591 XFS_ICI_RECLAIM_TAG);
604 spin_unlock(&ip->i_mount->m_perag_lock); 592 spin_unlock(&ip->i_mount->m_perag_lock);
593
594 /* schedule periodic background inode reclaim */
595 xfs_syncd_queue_reclaim(ip->i_mount);
596
605 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 597 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
606 -1, _RET_IP_); 598 -1, _RET_IP_);
607 } 599 }
@@ -762,8 +754,10 @@ xfs_reclaim_inode(
762 struct xfs_perag *pag, 754 struct xfs_perag *pag,
763 int sync_mode) 755 int sync_mode)
764{ 756{
765 int error = 0; 757 int error;
766 758
759restart:
760 error = 0;
767 xfs_ilock(ip, XFS_ILOCK_EXCL); 761 xfs_ilock(ip, XFS_ILOCK_EXCL);
768 if (!xfs_iflock_nowait(ip)) { 762 if (!xfs_iflock_nowait(ip)) {
769 if (!(sync_mode & SYNC_WAIT)) 763 if (!(sync_mode & SYNC_WAIT))
@@ -789,9 +783,31 @@ xfs_reclaim_inode(
789 if (xfs_inode_clean(ip)) 783 if (xfs_inode_clean(ip))
790 goto reclaim; 784 goto reclaim;
791 785
792 /* Now we have an inode that needs flushing */ 786 /*
793 error = xfs_iflush(ip, sync_mode); 787 * Now we have an inode that needs flushing.
788 *
789 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
790 * reclaim as we can deadlock with inode cluster removal.
791 * xfs_ifree_cluster() can lock the inode buffer before it locks the
792 * ip->i_lock, and we are doing the exact opposite here. As a result,
793 * doing a blocking xfs_itobp() to get the cluster buffer will result
794 * in an ABBA deadlock with xfs_ifree_cluster().
795 *
796 * As xfs_ifree_cluser() must gather all inodes that are active in the
797 * cache to mark them stale, if we hit this case we don't actually want
798 * to do IO here - we want the inode marked stale so we can simply
799 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
800 * just unlock the inode, back off and try again. Hopefully the next
801 * pass through will see the stale flag set on the inode.
802 */
803 error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
794 if (sync_mode & SYNC_WAIT) { 804 if (sync_mode & SYNC_WAIT) {
805 if (error == EAGAIN) {
806 xfs_iunlock(ip, XFS_ILOCK_EXCL);
807 /* backoff longer than in xfs_ifree_cluster */
808 delay(2);
809 goto restart;
810 }
795 xfs_iflock(ip); 811 xfs_iflock(ip);
796 goto reclaim; 812 goto reclaim;
797 } 813 }
@@ -806,7 +822,7 @@ xfs_reclaim_inode(
806 * pass on the error. 822 * pass on the error.
807 */ 823 */
808 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { 824 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
809 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 825 xfs_warn(ip->i_mount,
810 "inode 0x%llx background reclaim flush failed with %d", 826 "inode 0x%llx background reclaim flush failed with %d",
811 (long long)ip->i_ino, error); 827 (long long)ip->i_ino, error);
812 } 828 }
@@ -994,7 +1010,13 @@ xfs_reclaim_inodes(
994} 1010}
995 1011
996/* 1012/*
997 * Shrinker infrastructure. 1013 * Inode cache shrinker.
1014 *
1015 * When called we make sure that there is a background (fast) inode reclaim in
1016 * progress, while we will throttle the speed of reclaim via doiing synchronous
1017 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1018 * them to be cleaned, which we hope will not be very long due to the
1019 * background walker having already kicked the IO off on those dirty inodes.
998 */ 1020 */
999static int 1021static int
1000xfs_reclaim_inode_shrink( 1022xfs_reclaim_inode_shrink(
@@ -1009,10 +1031,15 @@ xfs_reclaim_inode_shrink(
1009 1031
1010 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1032 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1011 if (nr_to_scan) { 1033 if (nr_to_scan) {
1034 /* kick background reclaimer and push the AIL */
1035 xfs_syncd_queue_reclaim(mp);
1036 xfs_ail_push_all(mp->m_ail);
1037
1012 if (!(gfp_mask & __GFP_FS)) 1038 if (!(gfp_mask & __GFP_FS))
1013 return -1; 1039 return -1;
1014 1040
1015 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); 1041 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
1042 &nr_to_scan);
1016 /* terminate if we don't exhaust the scan */ 1043 /* terminate if we don't exhaust the scan */
1017 if (nr_to_scan > 0) 1044 if (nr_to_scan > 0)
1018 return -1; 1045 return -1;
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 34
35extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
36
35int xfs_syncd_init(struct xfs_mount *mp); 37int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 38void xfs_syncd_stop(struct xfs_mount *mp);
37 39
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index ee3cee097e7e..ee2d2adaa438 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -37,7 +37,7 @@ xfs_stats_clear_proc_handler(
37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); 37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
38 38
39 if (!ret && write && *valp) { 39 if (!ret && write && *valp) {
40 printk("XFS Clearing xfsstats\n"); 40 xfs_notice(NULL, "Clearing xfsstats");
41 for_each_possible_cpu(c) { 41 for_each_possible_cpu(c) {
42 preempt_disable(); 42 preempt_disable();
43 /* save vn_active, it's a universal truth! */ 43 /* save vn_active, it's a universal truth! */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index d22aa3103106..6fa214603819 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -544,9 +544,10 @@ xfs_qm_dqtobp(
544 /* 544 /*
545 * A simple sanity check in case we got a corrupted dquot... 545 * A simple sanity check in case we got a corrupted dquot...
546 */ 546 */
547 if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, 547 error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
548 flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), 548 flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
549 "dqtobp")) { 549 "dqtobp");
550 if (error) {
550 if (!(flags & XFS_QMOPT_DQREPAIR)) { 551 if (!(flags & XFS_QMOPT_DQREPAIR)) {
551 xfs_trans_brelse(tp, bp); 552 xfs_trans_brelse(tp, bp);
552 return XFS_ERROR(EIO); 553 return XFS_ERROR(EIO);
@@ -599,7 +600,7 @@ xfs_qm_dqread(
599 600
600 /* 601 /*
601 * Reservation counters are defined as reservation plus current usage 602 * Reservation counters are defined as reservation plus current usage
602 * to avoid having to add everytime. 603 * to avoid having to add every time.
603 */ 604 */
604 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); 605 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
605 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 606 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
@@ -827,7 +828,7 @@ xfs_qm_dqget(
827 if (xfs_do_dqerror) { 828 if (xfs_do_dqerror) {
828 if ((xfs_dqerror_target == mp->m_ddev_targp) && 829 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
829 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { 830 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
830 cmn_err(CE_DEBUG, "Returning error in dqget"); 831 xfs_debug(mp, "Returning error in dqget");
831 return (EIO); 832 return (EIO);
832 } 833 }
833 } 834 }
@@ -1207,8 +1208,9 @@ xfs_qm_dqflush(
1207 /* 1208 /*
1208 * A simple sanity check in case we got a corrupted dquot.. 1209 * A simple sanity check in case we got a corrupted dquot..
1209 */ 1210 */
1210 if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1211 error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1211 XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { 1212 XFS_QMOPT_DOWARN, "dqflush (incore copy)");
1213 if (error) {
1212 xfs_buf_relse(bp); 1214 xfs_buf_relse(bp);
1213 xfs_dqfunlock(dqp); 1215 xfs_dqfunlock(dqp);
1214 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1216 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
@@ -1391,8 +1393,8 @@ xfs_qm_dqpurge(
1391 */ 1393 */
1392 error = xfs_qm_dqflush(dqp, SYNC_WAIT); 1394 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1393 if (error) 1395 if (error)
1394 xfs_fs_cmn_err(CE_WARN, mp, 1396 xfs_warn(mp, "%s: dquot %p flush failed",
1395 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1397 __func__, dqp);
1396 xfs_dqflock(dqp); 1398 xfs_dqflock(dqp);
1397 } 1399 }
1398 ASSERT(atomic_read(&dqp->q_pincount) == 0); 1400 ASSERT(atomic_read(&dqp->q_pincount) == 0);
@@ -1425,36 +1427,38 @@ xfs_qm_dqpurge(
1425void 1427void
1426xfs_qm_dqprint(xfs_dquot_t *dqp) 1428xfs_qm_dqprint(xfs_dquot_t *dqp)
1427{ 1429{
1428 cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); 1430 struct xfs_mount *mp = dqp->q_mount;
1429 cmn_err(CE_DEBUG, "---- dquotID = %d", 1431
1432 xfs_debug(mp, "-----------KERNEL DQUOT----------------");
1433 xfs_debug(mp, "---- dquotID = %d",
1430 (int)be32_to_cpu(dqp->q_core.d_id)); 1434 (int)be32_to_cpu(dqp->q_core.d_id));
1431 cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); 1435 xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp));
1432 cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); 1436 xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount);
1433 cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); 1437 xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno);
1434 cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); 1438 xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
1435 cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)", 1439 xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)",
1436 be64_to_cpu(dqp->q_core.d_blk_hardlimit), 1440 be64_to_cpu(dqp->q_core.d_blk_hardlimit),
1437 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); 1441 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
1438 cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)", 1442 xfs_debug(mp, "---- blkslimit = %Lu (0x%x)",
1439 be64_to_cpu(dqp->q_core.d_blk_softlimit), 1443 be64_to_cpu(dqp->q_core.d_blk_softlimit),
1440 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); 1444 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
1441 cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)", 1445 xfs_debug(mp, "---- inohlimit = %Lu (0x%x)",
1442 be64_to_cpu(dqp->q_core.d_ino_hardlimit), 1446 be64_to_cpu(dqp->q_core.d_ino_hardlimit),
1443 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); 1447 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
1444 cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)", 1448 xfs_debug(mp, "---- inoslimit = %Lu (0x%x)",
1445 be64_to_cpu(dqp->q_core.d_ino_softlimit), 1449 be64_to_cpu(dqp->q_core.d_ino_softlimit),
1446 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); 1450 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
1447 cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", 1451 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
1448 be64_to_cpu(dqp->q_core.d_bcount), 1452 be64_to_cpu(dqp->q_core.d_bcount),
1449 (int)be64_to_cpu(dqp->q_core.d_bcount)); 1453 (int)be64_to_cpu(dqp->q_core.d_bcount));
1450 cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", 1454 xfs_debug(mp, "---- icount = %Lu (0x%x)",
1451 be64_to_cpu(dqp->q_core.d_icount), 1455 be64_to_cpu(dqp->q_core.d_icount),
1452 (int)be64_to_cpu(dqp->q_core.d_icount)); 1456 (int)be64_to_cpu(dqp->q_core.d_icount));
1453 cmn_err(CE_DEBUG, "---- btimer = %d", 1457 xfs_debug(mp, "---- btimer = %d",
1454 (int)be32_to_cpu(dqp->q_core.d_btimer)); 1458 (int)be32_to_cpu(dqp->q_core.d_btimer));
1455 cmn_err(CE_DEBUG, "---- itimer = %d", 1459 xfs_debug(mp, "---- itimer = %d",
1456 (int)be32_to_cpu(dqp->q_core.d_itimer)); 1460 (int)be32_to_cpu(dqp->q_core.d_itimer));
1457 cmn_err(CE_DEBUG, "---------------------------"); 1461 xfs_debug(mp, "---------------------------");
1458} 1462}
1459#endif 1463#endif
1460 1464
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2a1f3dc10a02..9e0e2fa3f2c8 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -136,9 +136,8 @@ xfs_qm_dquot_logitem_push(
136 */ 136 */
137 error = xfs_qm_dqflush(dqp, 0); 137 error = xfs_qm_dqflush(dqp, 0);
138 if (error) 138 if (error)
139 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 139 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
140 "xfs_qm_dquot_logitem_push: push error %d on dqp %p", 140 __func__, error, dqp);
141 error, dqp);
142 xfs_dqunlock(dqp); 141 xfs_dqunlock(dqp);
143} 142}
144 143
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 206a2815ced6..69228aa8605a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -80,7 +80,7 @@ xfs_qm_dquot_list_print(
80 int i = 0; 80 int i = 0;
81 81
82 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { 82 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
83 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " 83 xfs_debug(mp, " %d. \"%d (%s)\" "
84 "bcnt = %lld, icnt = %lld, refs = %d", 84 "bcnt = %lld, icnt = %lld, refs = %d",
85 i++, be32_to_cpu(dqp->q_core.d_id), 85 i++, be32_to_cpu(dqp->q_core.d_id),
86 DQFLAGTO_TYPESTR(dqp), 86 DQFLAGTO_TYPESTR(dqp),
@@ -205,7 +205,7 @@ xfs_qm_destroy(
205 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { 205 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
206 xfs_dqlock(dqp); 206 xfs_dqlock(dqp);
207#ifdef QUOTADEBUG 207#ifdef QUOTADEBUG
208 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); 208 xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
209#endif 209#endif
210 list_del_init(&dqp->q_freelist); 210 list_del_init(&dqp->q_freelist);
211 xfs_Gqm->qm_dqfrlist_cnt--; 211 xfs_Gqm->qm_dqfrlist_cnt--;
@@ -341,9 +341,7 @@ xfs_qm_mount_quotas(
341 * quotas immediately. 341 * quotas immediately.
342 */ 342 */
343 if (mp->m_sb.sb_rextents) { 343 if (mp->m_sb.sb_rextents) {
344 cmn_err(CE_NOTE, 344 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
345 "Cannot turn on quotas for realtime filesystem %s",
346 mp->m_fsname);
347 mp->m_qflags = 0; 345 mp->m_qflags = 0;
348 goto write_changes; 346 goto write_changes;
349 } 347 }
@@ -402,14 +400,13 @@ xfs_qm_mount_quotas(
402 * off, but the on disk superblock doesn't know that ! 400 * off, but the on disk superblock doesn't know that !
403 */ 401 */
404 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); 402 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
405 xfs_fs_cmn_err(CE_ALERT, mp, 403 xfs_alert(mp, "%s: Superblock update failed!",
406 "XFS mount_quotas: Superblock update failed!"); 404 __func__);
407 } 405 }
408 } 406 }
409 407
410 if (error) { 408 if (error) {
411 xfs_fs_cmn_err(CE_WARN, mp, 409 xfs_warn(mp, "Failed to initialize disk quotas.");
412 "Failed to initialize disk quotas.");
413 return; 410 return;
414 } 411 }
415 412
@@ -464,12 +461,10 @@ xfs_qm_dqflush_all(
464 struct xfs_quotainfo *q = mp->m_quotainfo; 461 struct xfs_quotainfo *q = mp->m_quotainfo;
465 int recl; 462 int recl;
466 struct xfs_dquot *dqp; 463 struct xfs_dquot *dqp;
467 int niters;
468 int error; 464 int error;
469 465
470 if (!q) 466 if (!q)
471 return 0; 467 return 0;
472 niters = 0;
473again: 468again:
474 mutex_lock(&q->qi_dqlist_lock); 469 mutex_lock(&q->qi_dqlist_lock);
475 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 470 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
@@ -1230,13 +1225,6 @@ xfs_qm_qino_alloc(
1230 } 1225 }
1231 1226
1232 /* 1227 /*
1233 * Keep an extra reference to this quota inode. This inode is
1234 * locked exclusively and joined to the transaction already.
1235 */
1236 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1237 IHOLD(*ip);
1238
1239 /*
1240 * Make the changes in the superblock, and log those too. 1228 * Make the changes in the superblock, and log those too.
1241 * sbfields arg may contain fields other than *QUOTINO; 1229 * sbfields arg may contain fields other than *QUOTINO;
1242 * VERSIONNUM for example. 1230 * VERSIONNUM for example.
@@ -1264,7 +1252,7 @@ xfs_qm_qino_alloc(
1264 xfs_mod_sb(tp, sbfields); 1252 xfs_mod_sb(tp, sbfields);
1265 1253
1266 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { 1254 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1267 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); 1255 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
1268 return error; 1256 return error;
1269 } 1257 }
1270 return 0; 1258 return 0;
@@ -1299,7 +1287,7 @@ xfs_qm_reset_dqcounts(
1299 * output any warnings because it's perfectly possible to 1287 * output any warnings because it's perfectly possible to
1300 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. 1288 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1301 */ 1289 */
1302 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, 1290 (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1303 "xfs_quotacheck"); 1291 "xfs_quotacheck");
1304 ddq->d_bcount = 0; 1292 ddq->d_bcount = 0;
1305 ddq->d_icount = 0; 1293 ddq->d_icount = 0;
@@ -1324,14 +1312,9 @@ xfs_qm_dqiter_bufs(
1324{ 1312{
1325 xfs_buf_t *bp; 1313 xfs_buf_t *bp;
1326 int error; 1314 int error;
1327 int notcommitted;
1328 int incr;
1329 int type; 1315 int type;
1330 1316
1331 ASSERT(blkcnt > 0); 1317 ASSERT(blkcnt > 0);
1332 notcommitted = 0;
1333 incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1334 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1335 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : 1318 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1336 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); 1319 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1337 error = 0; 1320 error = 0;
@@ -1676,7 +1659,7 @@ xfs_qm_quotacheck(
1676 */ 1659 */
1677 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); 1660 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1678 1661
1679 cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); 1662 xfs_notice(mp, "Quotacheck needed: Please wait.");
1680 1663
1681 /* 1664 /*
1682 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset 1665 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
@@ -1754,9 +1737,9 @@ xfs_qm_quotacheck(
1754 1737
1755 error_return: 1738 error_return:
1756 if (error) { 1739 if (error) {
1757 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): " 1740 xfs_warn(mp,
1758 "Disabling quotas.", 1741 "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1759 mp->m_fsname, error); 1742 error);
1760 /* 1743 /*
1761 * We must turn off quotas. 1744 * We must turn off quotas.
1762 */ 1745 */
@@ -1764,12 +1747,11 @@ xfs_qm_quotacheck(
1764 ASSERT(xfs_Gqm != NULL); 1747 ASSERT(xfs_Gqm != NULL);
1765 xfs_qm_destroy_quotainfo(mp); 1748 xfs_qm_destroy_quotainfo(mp);
1766 if (xfs_mount_reset_sbqflags(mp)) { 1749 if (xfs_mount_reset_sbqflags(mp)) {
1767 cmn_err(CE_WARN, "XFS quotacheck %s: " 1750 xfs_warn(mp,
1768 "Failed to reset quota flags.", mp->m_fsname); 1751 "Quotacheck: Failed to reset quota flags.");
1769 } 1752 }
1770 } else { 1753 } else
1771 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); 1754 xfs_notice(mp, "Quotacheck: Done.");
1772 }
1773 return (error); 1755 return (error);
1774} 1756}
1775 1757
@@ -1937,8 +1919,8 @@ again:
1937 */ 1919 */
1938 error = xfs_qm_dqflush(dqp, 0); 1920 error = xfs_qm_dqflush(dqp, 0);
1939 if (error) { 1921 if (error) {
1940 xfs_fs_cmn_err(CE_WARN, mp, 1922 xfs_warn(mp, "%s: dquot %p flush failed",
1941 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 1923 __func__, dqp);
1942 } 1924 }
1943 goto dqunlock; 1925 goto dqunlock;
1944 } 1926 }
@@ -2115,7 +2097,7 @@ xfs_qm_write_sb_changes(
2115 int error; 2097 int error;
2116 2098
2117#ifdef QUOTADEBUG 2099#ifdef QUOTADEBUG
2118 cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname); 2100 xfs_notice(mp, "Writing superblock quota changes");
2119#endif 2101#endif
2120 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 2102 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2121 if ((error = xfs_trans_reserve(tp, 0, 2103 if ((error = xfs_trans_reserve(tp, 0,
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index c9446f1c726d..567b29b9f1b3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone;
65 * block in the dquot/xqm code. 65 * block in the dquot/xqm code.
66 */ 66 */
67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
68/*
69 * When doing a quotacheck, we log dquot clusters of this many FSBs at most
70 * in a single transaction. We don't want to ask for too huge a log reservation.
71 */
72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
73 68
74typedef xfs_dqhash_t xfs_dqlist_t; 69typedef xfs_dqhash_t xfs_dqlist_t;
75 70
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 45b5cb1788ab..a0a829addca9 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -119,8 +119,7 @@ xfs_qm_newmount(
119 (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || 119 (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
120 (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && 120 (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) &&
121 xfs_dev_is_read_only(mp, "changing quota state")) { 121 xfs_dev_is_read_only(mp, "changing quota state")) {
122 cmn_err(CE_WARN, 122 xfs_warn(mp, "please mount with%s%s%s%s.",
123 "XFS: please mount with%s%s%s%s.",
124 (!quotaondisk ? "out quota" : ""), 123 (!quotaondisk ? "out quota" : ""),
125 (uquotaondisk ? " usrquota" : ""), 124 (uquotaondisk ? " usrquota" : ""),
126 (pquotaondisk ? " prjquota" : ""), 125 (pquotaondisk ? " prjquota" : ""),
@@ -135,7 +134,7 @@ xfs_qm_newmount(
135 */ 134 */
136 if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { 135 if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
137 /* 136 /*
138 * If an error occured, qm_mount_quotas code 137 * If an error occurred, qm_mount_quotas code
139 * has already disabled quotas. So, just finish 138 * has already disabled quotas. So, just finish
140 * mounting, and get on with the boring life 139 * mounting, and get on with the boring life
141 * without disk quotas. 140 * without disk quotas.
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index bdebc183223e..2dadb15d5ca9 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -41,12 +41,6 @@
41#include "xfs_qm.h" 41#include "xfs_qm.h"
42#include "xfs_trace.h" 42#include "xfs_trace.h"
43 43
44#ifdef DEBUG
45# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
46#else
47# define qdprintk(s, args...) do { } while (0)
48#endif
49
50STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 44STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
51STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 45STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
52 uint); 46 uint);
@@ -178,7 +172,7 @@ xfs_qm_scall_quotaoff(
178 /* 172 /*
179 * Next we make the changes in the quota flag in the mount struct. 173 * Next we make the changes in the quota flag in the mount struct.
180 * This isn't protected by a particular lock directly, because we 174 * This isn't protected by a particular lock directly, because we
181 * don't want to take a mrlock everytime we depend on quotas being on. 175 * don't want to take a mrlock every time we depend on quotas being on.
182 */ 176 */
183 mp->m_qflags &= ~(flags); 177 mp->m_qflags &= ~(flags);
184 178
@@ -294,7 +288,8 @@ xfs_qm_scall_trunc_qfiles(
294 int error = 0, error2 = 0; 288 int error = 0, error2 = 0;
295 289
296 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 290 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
297 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 291 xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
292 __func__, flags, mp->m_qflags);
298 return XFS_ERROR(EINVAL); 293 return XFS_ERROR(EINVAL);
299 } 294 }
300 295
@@ -318,20 +313,19 @@ xfs_qm_scall_quotaon(
318{ 313{
319 int error; 314 int error;
320 uint qf; 315 uint qf;
321 uint accflags;
322 __int64_t sbflags; 316 __int64_t sbflags;
323 317
324 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 318 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
325 /* 319 /*
326 * Switching on quota accounting must be done at mount time. 320 * Switching on quota accounting must be done at mount time.
327 */ 321 */
328 accflags = flags & XFS_ALL_QUOTA_ACCT;
329 flags &= ~(XFS_ALL_QUOTA_ACCT); 322 flags &= ~(XFS_ALL_QUOTA_ACCT);
330 323
331 sbflags = 0; 324 sbflags = 0;
332 325
333 if (flags == 0) { 326 if (flags == 0) {
334 qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags); 327 xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
328 __func__, mp->m_qflags);
335 return XFS_ERROR(EINVAL); 329 return XFS_ERROR(EINVAL);
336 } 330 }
337 331
@@ -352,12 +346,13 @@ xfs_qm_scall_quotaon(
352 (flags & XFS_GQUOTA_ACCT) == 0 && 346 (flags & XFS_GQUOTA_ACCT) == 0 &&
353 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && 347 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
354 (flags & XFS_OQUOTA_ENFD))) { 348 (flags & XFS_OQUOTA_ENFD))) {
355 qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", 349 xfs_debug(mp,
356 flags, mp->m_sb.sb_qflags); 350 "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
351 __func__, flags, mp->m_sb.sb_qflags);
357 return XFS_ERROR(EINVAL); 352 return XFS_ERROR(EINVAL);
358 } 353 }
359 /* 354 /*
360 * If everything's upto-date incore, then don't waste time. 355 * If everything's up to-date incore, then don't waste time.
361 */ 356 */
362 if ((mp->m_qflags & flags) == flags) 357 if ((mp->m_qflags & flags) == flags)
363 return XFS_ERROR(EEXIST); 358 return XFS_ERROR(EEXIST);
@@ -541,7 +536,7 @@ xfs_qm_scall_setqlim(
541 q->qi_bsoftlimit = soft; 536 q->qi_bsoftlimit = soft;
542 } 537 }
543 } else { 538 } else {
544 qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); 539 xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
545 } 540 }
546 hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? 541 hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
547 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : 542 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
@@ -557,7 +552,7 @@ xfs_qm_scall_setqlim(
557 q->qi_rtbsoftlimit = soft; 552 q->qi_rtbsoftlimit = soft;
558 } 553 }
559 } else { 554 } else {
560 qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); 555 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
561 } 556 }
562 557
563 hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? 558 hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
@@ -574,7 +569,7 @@ xfs_qm_scall_setqlim(
574 q->qi_isoftlimit = soft; 569 q->qi_isoftlimit = soft;
575 } 570 }
576 } else { 571 } else {
577 qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); 572 xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
578 } 573 }
579 574
580 /* 575 /*
@@ -939,10 +934,11 @@ struct mutex qcheck_lock;
939#define DQTEST_LIST_PRINT(l, NXT, title) \ 934#define DQTEST_LIST_PRINT(l, NXT, title) \
940{ \ 935{ \
941 xfs_dqtest_t *dqp; int i = 0;\ 936 xfs_dqtest_t *dqp; int i = 0;\
942 cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ 937 xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
943 for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ 938 for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
944 dqp = (xfs_dqtest_t *)dqp->NXT) { \ 939 dqp = (xfs_dqtest_t *)dqp->NXT) { \
945 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ 940 xfs_debug(dqp->q_mount, \
941 " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
946 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ 942 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
947 dqp->d_bcount, dqp->d_icount); } \ 943 dqp->d_bcount, dqp->d_icount); } \
948} 944}
@@ -966,16 +962,17 @@ xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
966} 962}
967STATIC void 963STATIC void
968xfs_qm_dqtest_print( 964xfs_qm_dqtest_print(
969 xfs_dqtest_t *d) 965 struct xfs_mount *mp,
966 struct dqtest *d)
970{ 967{
971 cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); 968 xfs_debug(mp, "-----------DQTEST DQUOT----------------");
972 cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); 969 xfs_debug(mp, "---- dquot ID = %d", d->d_id);
973 cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); 970 xfs_debug(mp, "---- fs = 0x%p", d->q_mount);
974 cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", 971 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
975 d->d_bcount, (int)d->d_bcount); 972 d->d_bcount, (int)d->d_bcount);
976 cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", 973 xfs_debug(mp, "---- icount = %Lu (0x%x)",
977 d->d_icount, (int)d->d_icount); 974 d->d_icount, (int)d->d_icount);
978 cmn_err(CE_DEBUG, "---------------------------"); 975 xfs_debug(mp, "---------------------------");
979} 976}
980 977
981STATIC void 978STATIC void
@@ -989,12 +986,14 @@ xfs_qm_dqtest_failed(
989{ 986{
990 qmtest_nfails++; 987 qmtest_nfails++;
991 if (error) 988 if (error)
992 cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s", 989 xfs_debug(dqp->q_mount,
993 d->d_id, error, reason); 990 "quotacheck failed id=%d, err=%d\nreason: %s",
991 d->d_id, error, reason);
994 else 992 else
995 cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]", 993 xfs_debug(dqp->q_mount,
996 d->d_id, reason, (int)a, (int)b); 994 "quotacheck failed id=%d (%s) [%d != %d]",
997 xfs_qm_dqtest_print(d); 995 d->d_id, reason, (int)a, (int)b);
996 xfs_qm_dqtest_print(dqp->q_mount, d);
998 if (dqp) 997 if (dqp)
999 xfs_qm_dqprint(dqp); 998 xfs_qm_dqprint(dqp);
1000} 999}
@@ -1021,9 +1020,9 @@ xfs_dqtest_cmp2(
1021 be64_to_cpu(dqp->q_core.d_bcount) >= 1020 be64_to_cpu(dqp->q_core.d_bcount) >=
1022 be64_to_cpu(dqp->q_core.d_blk_softlimit)) { 1021 be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
1023 if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { 1022 if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
1024 cmn_err(CE_DEBUG, 1023 xfs_debug(dqp->q_mount,
1025 "%d [%s] [0x%p] BLK TIMER NOT STARTED", 1024 "%d [%s] BLK TIMER NOT STARTED",
1026 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1025 d->d_id, DQFLAGTO_TYPESTR(d));
1027 err++; 1026 err++;
1028 } 1027 }
1029 } 1028 }
@@ -1031,16 +1030,16 @@ xfs_dqtest_cmp2(
1031 be64_to_cpu(dqp->q_core.d_icount) >= 1030 be64_to_cpu(dqp->q_core.d_icount) >=
1032 be64_to_cpu(dqp->q_core.d_ino_softlimit)) { 1031 be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
1033 if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { 1032 if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
1034 cmn_err(CE_DEBUG, 1033 xfs_debug(dqp->q_mount,
1035 "%d [%s] [0x%p] INO TIMER NOT STARTED", 1034 "%d [%s] INO TIMER NOT STARTED",
1036 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1035 d->d_id, DQFLAGTO_TYPESTR(d));
1037 err++; 1036 err++;
1038 } 1037 }
1039 } 1038 }
1040#ifdef QUOTADEBUG 1039#ifdef QUOTADEBUG
1041 if (!err) { 1040 if (!err) {
1042 cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", 1041 xfs_debug(dqp->q_mount, "%d [%s] qchecked",
1043 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1042 d->d_id, DQFLAGTO_TYPESTR(d));
1044 } 1043 }
1045#endif 1044#endif
1046 return (err); 1045 return (err);
@@ -1137,8 +1136,8 @@ xfs_qm_internalqcheck_adjust(
1137 1136
1138 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { 1137 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1139 *res = BULKSTAT_RV_NOTHING; 1138 *res = BULKSTAT_RV_NOTHING;
1140 qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n", 1139 xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
1141 (unsigned long long) ino, 1140 __func__, (unsigned long long) ino,
1142 (unsigned long long) mp->m_sb.sb_uquotino, 1141 (unsigned long long) mp->m_sb.sb_uquotino,
1143 (unsigned long long) mp->m_sb.sb_gquotino); 1142 (unsigned long long) mp->m_sb.sb_gquotino);
1144 return XFS_ERROR(EINVAL); 1143 return XFS_ERROR(EINVAL);
@@ -1223,12 +1222,12 @@ xfs_qm_internalqcheck(
1223 xfs_qm_internalqcheck_adjust, 1222 xfs_qm_internalqcheck_adjust,
1224 0, NULL, &done); 1223 0, NULL, &done);
1225 if (error) { 1224 if (error) {
1226 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); 1225 xfs_debug(mp, "Bulkstat returned error 0x%x", error);
1227 break; 1226 break;
1228 } 1227 }
1229 } while (!done); 1228 } while (!done);
1230 1229
1231 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1230 xfs_debug(mp, "Checking results against system dquots");
1232 for (i = 0; i < qmtest_hashmask; i++) { 1231 for (i = 0; i < qmtest_hashmask; i++) {
1233 xfs_dqtest_t *d, *n; 1232 xfs_dqtest_t *d, *n;
1234 xfs_dqhash_t *h; 1233 xfs_dqhash_t *h;
@@ -1246,10 +1245,10 @@ xfs_qm_internalqcheck(
1246 } 1245 }
1247 1246
1248 if (qmtest_nfails) { 1247 if (qmtest_nfails) {
1249 cmn_err(CE_DEBUG, "******** quotacheck failed ********"); 1248 xfs_debug(mp, "******** quotacheck failed ********");
1250 cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails); 1249 xfs_debug(mp, "failures = %d", qmtest_nfails);
1251 } else { 1250 } else {
1252 cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); 1251 xfs_debug(mp, "******** quotacheck successful! ********");
1253 } 1252 }
1254 kmem_free(qmtest_udqtab); 1253 kmem_free(qmtest_udqtab);
1255 kmem_free(qmtest_gdqtab); 1254 kmem_free(qmtest_gdqtab);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 7de91d1b75c0..2a3648731331 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -643,8 +643,9 @@ xfs_trans_dqresv(
643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && 643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { 644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
645#ifdef QUOTADEBUG 645#ifdef QUOTADEBUG
646 cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" 646 xfs_debug(mp,
647 " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); 647 "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
648 nblks, *resbcountp, hardlimit);
648#endif 649#endif
649 if (nblks > 0) { 650 if (nblks > 0) {
650 /* 651 /*
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
deleted file mode 100644
index 0df88897ef84..000000000000
--- a/fs/xfs/support/debug.c
+++ /dev/null
@@ -1,107 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <xfs.h>
19#include "debug.h"
20
21/* xfs_mount.h drags a lot of crap in, sorry.. */
22#include "xfs_sb.h"
23#include "xfs_inum.h"
24#include "xfs_ag.h"
25#include "xfs_mount.h"
26#include "xfs_error.h"
27
28void
29cmn_err(
30 const char *lvl,
31 const char *fmt,
32 ...)
33{
34 struct va_format vaf;
35 va_list args;
36
37 va_start(args, fmt);
38 vaf.fmt = fmt;
39 vaf.va = &args;
40
41 printk("%s%pV", lvl, &vaf);
42 va_end(args);
43
44 BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
45}
46
47void
48xfs_fs_cmn_err(
49 const char *lvl,
50 struct xfs_mount *mp,
51 const char *fmt,
52 ...)
53{
54 struct va_format vaf;
55 va_list args;
56
57 va_start(args, fmt);
58 vaf.fmt = fmt;
59 vaf.va = &args;
60
61 printk("%sFilesystem %s: %pV", lvl, mp->m_fsname, &vaf);
62 va_end(args);
63
64 BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
65}
66
67/* All callers to xfs_cmn_err use CE_ALERT, so don't bother testing lvl */
68void
69xfs_cmn_err(
70 int panic_tag,
71 const char *lvl,
72 struct xfs_mount *mp,
73 const char *fmt,
74 ...)
75{
76 struct va_format vaf;
77 va_list args;
78 int do_panic = 0;
79
80 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
81 printk(KERN_ALERT "XFS: Transforming an alert into a BUG.");
82 do_panic = 1;
83 }
84
85 va_start(args, fmt);
86 vaf.fmt = fmt;
87 vaf.va = &args;
88
89 printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf);
90 va_end(args);
91
92 BUG_ON(do_panic);
93}
94
95void
96assfail(char *expr, char *file, int line)
97{
98 printk(KERN_CRIT "Assertion failed: %s, file: %s, line: %d\n", expr,
99 file, line);
100 BUG();
101}
102
103void
104xfs_hex_dump(void *p, int length)
105{
106 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
107}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
deleted file mode 100644
index 05699f67d475..000000000000
--- a/fs/xfs/support/debug.h
+++ /dev/null
@@ -1,61 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_DEBUG_H__
19#define __XFS_SUPPORT_DEBUG_H__
20
21#include <stdarg.h>
22
23struct xfs_mount;
24
25#define CE_DEBUG KERN_DEBUG
26#define CE_CONT KERN_INFO
27#define CE_NOTE KERN_NOTICE
28#define CE_WARN KERN_WARNING
29#define CE_ALERT KERN_ALERT
30#define CE_PANIC KERN_EMERG
31
32void cmn_err(const char *lvl, const char *fmt, ...)
33 __attribute__ ((format (printf, 2, 3)));
34void xfs_fs_cmn_err( const char *lvl, struct xfs_mount *mp,
35 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
36void xfs_cmn_err( int panic_tag, const char *lvl, struct xfs_mount *mp,
37 const char *fmt, ...) __attribute__ ((format (printf, 4, 5)));
38
39extern void assfail(char *expr, char *f, int l);
40
41#define ASSERT_ALWAYS(expr) \
42 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
43
44#ifndef DEBUG
45#define ASSERT(expr) ((void)0)
46
47#ifndef STATIC
48# define STATIC static noinline
49#endif
50
51#else /* DEBUG */
52
53#define ASSERT(expr) \
54 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
55
56#ifndef STATIC
57# define STATIC noinline
58#endif
59
60#endif /* DEBUG */
61#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f3227984a9bf..27d64d752eab 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -147,10 +147,9 @@ xfs_alloc_get_rec(
147 */ 147 */
148STATIC void 148STATIC void
149xfs_alloc_compute_aligned( 149xfs_alloc_compute_aligned(
150 xfs_alloc_arg_t *args, /* allocation argument structure */
150 xfs_agblock_t foundbno, /* starting block in found extent */ 151 xfs_agblock_t foundbno, /* starting block in found extent */
151 xfs_extlen_t foundlen, /* length in found extent */ 152 xfs_extlen_t foundlen, /* length in found extent */
152 xfs_extlen_t alignment, /* alignment for allocation */
153 xfs_extlen_t minlen, /* minimum length for allocation */
154 xfs_agblock_t *resbno, /* result block number */ 153 xfs_agblock_t *resbno, /* result block number */
155 xfs_extlen_t *reslen) /* result length */ 154 xfs_extlen_t *reslen) /* result length */
156{ 155{
@@ -158,8 +157,8 @@ xfs_alloc_compute_aligned(
158 xfs_extlen_t diff; 157 xfs_extlen_t diff;
159 xfs_extlen_t len; 158 xfs_extlen_t len;
160 159
161 if (alignment > 1 && foundlen >= minlen) { 160 if (args->alignment > 1 && foundlen >= args->minlen) {
162 bno = roundup(foundbno, alignment); 161 bno = roundup(foundbno, args->alignment);
163 diff = bno - foundbno; 162 diff = bno - foundbno;
164 len = diff >= foundlen ? 0 : foundlen - diff; 163 len = diff >= foundlen ? 0 : foundlen - diff;
165 } else { 164 } else {
@@ -464,6 +463,27 @@ xfs_alloc_read_agfl(
464 return 0; 463 return 0;
465} 464}
466 465
466STATIC int
467xfs_alloc_update_counters(
468 struct xfs_trans *tp,
469 struct xfs_perag *pag,
470 struct xfs_buf *agbp,
471 long len)
472{
473 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
474
475 pag->pagf_freeblks += len;
476 be32_add_cpu(&agf->agf_freeblks, len);
477
478 xfs_trans_agblocks_delta(tp, len);
479 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
480 be32_to_cpu(agf->agf_length)))
481 return EFSCORRUPTED;
482
483 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
484 return 0;
485}
486
467/* 487/*
468 * Allocation group level functions. 488 * Allocation group level functions.
469 */ 489 */
@@ -505,49 +525,44 @@ xfs_alloc_ag_vextent(
505 ASSERT(0); 525 ASSERT(0);
506 /* NOTREACHED */ 526 /* NOTREACHED */
507 } 527 }
508 if (error) 528
529 if (error || args->agbno == NULLAGBLOCK)
509 return error; 530 return error;
510 /*
511 * If the allocation worked, need to change the agf structure
512 * (and log it), and the superblock.
513 */
514 if (args->agbno != NULLAGBLOCK) {
515 xfs_agf_t *agf; /* allocation group freelist header */
516 long slen = (long)args->len;
517 531
518 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 532 ASSERT(args->len >= args->minlen);
519 ASSERT(!(args->wasfromfl) || !args->isfl); 533 ASSERT(args->len <= args->maxlen);
520 ASSERT(args->agbno % args->alignment == 0); 534 ASSERT(!args->wasfromfl || !args->isfl);
521 if (!(args->wasfromfl)) { 535 ASSERT(args->agbno % args->alignment == 0);
522 536
523 agf = XFS_BUF_TO_AGF(args->agbp); 537 if (!args->wasfromfl) {
524 be32_add_cpu(&agf->agf_freeblks, -(args->len)); 538 error = xfs_alloc_update_counters(args->tp, args->pag,
525 xfs_trans_agblocks_delta(args->tp, 539 args->agbp,
526 -((long)(args->len))); 540 -((long)(args->len)));
527 args->pag->pagf_freeblks -= args->len; 541 if (error)
528 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 542 return error;
529 be32_to_cpu(agf->agf_length)); 543
530 xfs_alloc_log_agf(args->tp, args->agbp, 544 /*
531 XFS_AGF_FREEBLKS); 545 * Search the busylist for these blocks and mark the
532 /* 546 * transaction as synchronous if blocks are found. This
533 * Search the busylist for these blocks and mark the 547 * avoids the need to block due to a synchronous log
534 * transaction as synchronous if blocks are found. This 548 * force to ensure correct ordering as the synchronous
535 * avoids the need to block due to a synchronous log 549 * transaction will guarantee that for us.
536 * force to ensure correct ordering as the synchronous 550 */
537 * transaction will guarantee that for us. 551 if (xfs_alloc_busy_search(args->mp, args->agno,
538 */ 552 args->agbno, args->len))
539 if (xfs_alloc_busy_search(args->mp, args->agno, 553 xfs_trans_set_sync(args->tp);
540 args->agbno, args->len))
541 xfs_trans_set_sync(args->tp);
542 }
543 if (!args->isfl)
544 xfs_trans_mod_sb(args->tp,
545 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
546 XFS_TRANS_SB_FDBLOCKS, -slen);
547 XFS_STATS_INC(xs_allocx);
548 XFS_STATS_ADD(xs_allocb, args->len);
549 } 554 }
550 return 0; 555
556 if (!args->isfl) {
557 xfs_trans_mod_sb(args->tp, args->wasdel ?
558 XFS_TRANS_SB_RES_FDBLOCKS :
559 XFS_TRANS_SB_FDBLOCKS,
560 -((long)(args->len)));
561 }
562
563 XFS_STATS_INC(xs_allocx);
564 XFS_STATS_ADD(xs_allocb, args->len);
565 return error;
551} 566}
552 567
553/* 568/*
@@ -693,8 +708,7 @@ xfs_alloc_find_best_extent(
693 if (error) 708 if (error)
694 goto error0; 709 goto error0;
695 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
696 xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, 711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
697 args->minlen, &bno, slena);
698 712
699 /* 713 /*
700 * The good extent is closer than this one. 714 * The good extent is closer than this one.
@@ -866,8 +880,8 @@ xfs_alloc_ag_vextent_near(
866 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 880 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
867 goto error0; 881 goto error0;
868 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 882 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
869 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 883 xfs_alloc_compute_aligned(args, ltbno, ltlen,
870 args->minlen, &ltbnoa, &ltlena); 884 &ltbnoa, &ltlena);
871 if (ltlena < args->minlen) 885 if (ltlena < args->minlen)
872 continue; 886 continue;
873 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 887 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
@@ -987,8 +1001,8 @@ xfs_alloc_ag_vextent_near(
987 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 1001 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
988 goto error0; 1002 goto error0;
989 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1003 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
990 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 1004 xfs_alloc_compute_aligned(args, ltbno, ltlen,
991 args->minlen, &ltbnoa, &ltlena); 1005 &ltbnoa, &ltlena);
992 if (ltlena >= args->minlen) 1006 if (ltlena >= args->minlen)
993 break; 1007 break;
994 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1008 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -1003,8 +1017,8 @@ xfs_alloc_ag_vextent_near(
1003 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 1017 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1004 goto error0; 1018 goto error0;
1005 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1019 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1006 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, 1020 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1007 args->minlen, &gtbnoa, &gtlena); 1021 &gtbnoa, &gtlena);
1008 if (gtlena >= args->minlen) 1022 if (gtlena >= args->minlen)
1009 break; 1023 break;
1010 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1024 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1183,8 +1197,7 @@ xfs_alloc_ag_vextent_size(
1183 * once aligned; if not, we search left for something better. 1197 * once aligned; if not, we search left for something better.
1184 * This can't happen in the second case above. 1198 * This can't happen in the second case above.
1185 */ 1199 */
1186 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, 1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1187 &rbno, &rlen);
1188 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1189 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1190 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1209,8 +1222,8 @@ xfs_alloc_ag_vextent_size(
1209 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1222 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1210 if (flen < bestrlen) 1223 if (flen < bestrlen)
1211 break; 1224 break;
1212 xfs_alloc_compute_aligned(fbno, flen, args->alignment, 1225 xfs_alloc_compute_aligned(args, fbno, flen,
1213 args->minlen, &rbno, &rlen); 1226 &rbno, &rlen);
1214 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1227 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1215 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1228 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1216 (rlen <= flen && rbno + rlen <= fbno + flen), 1229 (rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1388,6 +1401,7 @@ xfs_free_ag_extent(
1388 xfs_mount_t *mp; /* mount point struct for filesystem */ 1401 xfs_mount_t *mp; /* mount point struct for filesystem */
1389 xfs_agblock_t nbno; /* new starting block of freespace */ 1402 xfs_agblock_t nbno; /* new starting block of freespace */
1390 xfs_extlen_t nlen; /* new length of freespace */ 1403 xfs_extlen_t nlen; /* new length of freespace */
1404 xfs_perag_t *pag; /* per allocation group data */
1391 1405
1392 mp = tp->t_mountp; 1406 mp = tp->t_mountp;
1393 /* 1407 /*
@@ -1586,30 +1600,20 @@ xfs_free_ag_extent(
1586 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1600 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1587 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1601 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1588 cnt_cur = NULL; 1602 cnt_cur = NULL;
1603
1589 /* 1604 /*
1590 * Update the freespace totals in the ag and superblock. 1605 * Update the freespace totals in the ag and superblock.
1591 */ 1606 */
1592 { 1607 pag = xfs_perag_get(mp, agno);
1593 xfs_agf_t *agf; 1608 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1594 xfs_perag_t *pag; /* per allocation group data */ 1609 xfs_perag_put(pag);
1595 1610 if (error)
1596 pag = xfs_perag_get(mp, agno); 1611 goto error0;
1597 pag->pagf_freeblks += len; 1612
1598 xfs_perag_put(pag); 1613 if (!isfl)
1599 1614 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1600 agf = XFS_BUF_TO_AGF(agbp); 1615 XFS_STATS_INC(xs_freex);
1601 be32_add_cpu(&agf->agf_freeblks, len); 1616 XFS_STATS_ADD(xs_freeb, len);
1602 xfs_trans_agblocks_delta(tp, len);
1603 XFS_WANT_CORRUPTED_GOTO(
1604 be32_to_cpu(agf->agf_freeblks) <=
1605 be32_to_cpu(agf->agf_length),
1606 error0);
1607 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1608 if (!isfl)
1609 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1610 XFS_STATS_INC(xs_freex);
1611 XFS_STATS_ADD(xs_freeb, len);
1612 }
1613 1617
1614 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1618 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1615 1619
@@ -2391,17 +2395,33 @@ xfs_free_extent(
2391 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2395 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2392 args.tp = tp; 2396 args.tp = tp;
2393 args.mp = tp->t_mountp; 2397 args.mp = tp->t_mountp;
2398
2399 /*
2400 * validate that the block number is legal - the enables us to detect
2401 * and handle a silent filesystem corruption rather than crashing.
2402 */
2394 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2403 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2395 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2404 if (args.agno >= args.mp->m_sb.sb_agcount)
2405 return EFSCORRUPTED;
2406
2396 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2407 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2408 if (args.agbno >= args.mp->m_sb.sb_agblocks)
2409 return EFSCORRUPTED;
2410
2397 args.pag = xfs_perag_get(args.mp, args.agno); 2411 args.pag = xfs_perag_get(args.mp, args.agno);
2398 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2412 ASSERT(args.pag);
2413
2414 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2415 if (error)
2399 goto error0; 2416 goto error0;
2400#ifdef DEBUG 2417
2401 ASSERT(args.agbp != NULL); 2418 /* validate the extent size is legal now we have the agf locked */
2402 ASSERT((args.agbno + len) <= 2419 if (args.agbno + len >
2403 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); 2420 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2404#endif 2421 error = EFSCORRUPTED;
2422 goto error0;
2423 }
2424
2405 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2406error0: 2426error0:
2407 xfs_perag_put(args.pag); 2427 xfs_perag_put(args.pag);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index dc3afd7739ff..fa00788de2f5 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2365,6 +2365,13 @@ xfs_bmap_rtalloc(
2365 */ 2365 */
2366 if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 2366 if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
2367 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 2367 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
2368
2369 /*
2370 * Lock out other modifications to the RT bitmap inode.
2371 */
2372 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2373 xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2374
2368 /* 2375 /*
2369 * If it's an allocation to an empty file at offset 0, 2376 * If it's an allocation to an empty file at offset 0,
2370 * pick an extent that will space things out in the rt area. 2377 * pick an extent that will space things out in the rt area.
@@ -3519,7 +3526,7 @@ xfs_bmap_search_extents(
3519 3526
3520 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && 3527 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
3521 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { 3528 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
3522 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, 3529 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
3523 "Access to block zero in inode %llu " 3530 "Access to block zero in inode %llu "
3524 "start_block: %llx start_off: %llx " 3531 "start_block: %llx start_off: %llx "
3525 "blkcnt: %llx extent-state: %x lastx: %x\n", 3532 "blkcnt: %llx extent-state: %x lastx: %x\n",
@@ -4193,12 +4200,11 @@ xfs_bmap_read_extents(
4193 num_recs = xfs_btree_get_numrecs(block); 4200 num_recs = xfs_btree_get_numrecs(block);
4194 if (unlikely(i + num_recs > room)) { 4201 if (unlikely(i + num_recs > room)) {
4195 ASSERT(i + num_recs <= room); 4202 ASSERT(i + num_recs <= room);
4196 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4203 xfs_warn(ip->i_mount,
4197 "corrupt dinode %Lu, (btree extents).", 4204 "corrupt dinode %Lu, (btree extents).",
4198 (unsigned long long) ip->i_ino); 4205 (unsigned long long) ip->i_ino);
4199 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", 4206 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
4200 XFS_ERRLEVEL_LOW, 4207 XFS_ERRLEVEL_LOW, ip->i_mount, block);
4201 ip->i_mount);
4202 goto error0; 4208 goto error0;
4203 } 4209 }
4204 XFS_WANT_CORRUPTED_GOTO( 4210 XFS_WANT_CORRUPTED_GOTO(
@@ -5772,7 +5778,7 @@ xfs_check_block(
5772 else 5778 else
5773 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); 5779 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
5774 if (*thispa == *pp) { 5780 if (*thispa == *pp) {
5775 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 5781 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
5776 __func__, j, i, 5782 __func__, j, i,
5777 (unsigned long long)be64_to_cpu(*thispa)); 5783 (unsigned long long)be64_to_cpu(*thispa));
5778 panic("%s: ptrs are equal in node\n", 5784 panic("%s: ptrs are equal in node\n",
@@ -5937,11 +5943,11 @@ xfs_bmap_check_leaf_extents(
5937 return; 5943 return;
5938 5944
5939error0: 5945error0:
5940 cmn_err(CE_WARN, "%s: at error0", __func__); 5946 xfs_warn(mp, "%s: at error0", __func__);
5941 if (bp_release) 5947 if (bp_release)
5942 xfs_trans_brelse(NULL, bp); 5948 xfs_trans_brelse(NULL, bp);
5943error_norelse: 5949error_norelse:
5944 cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", 5950 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
5945 __func__, i); 5951 __func__, i);
5946 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); 5952 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
5947 return; 5953 return;
@@ -6144,7 +6150,7 @@ xfs_bmap_punch_delalloc_range(
6144 if (error) { 6150 if (error) {
6145 /* something screwed, just bail */ 6151 /* something screwed, just bail */
6146 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 6152 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
6147 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 6153 xfs_alert(ip->i_mount,
6148 "Failed delalloc mapping lookup ino %lld fsb %lld.", 6154 "Failed delalloc mapping lookup ino %lld fsb %lld.",
6149 ip->i_ino, start_fsb); 6155 ip->i_ino, start_fsb);
6150 } 6156 }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6f8c21ce0d6d..7b7e005e3dcc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -130,10 +130,12 @@ xfs_buf_item_log_check(
130 orig = bip->bli_orig; 130 orig = bip->bli_orig;
131 buffer = XFS_BUF_PTR(bp); 131 buffer = XFS_BUF_PTR(bp);
132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) { 132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) 133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
134 cmn_err(CE_PANIC, 134 xfs_emerg(bp->b_mount,
135 "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", 135 "%s: bip %x buffer %x orig %x index %d",
136 bip, bp, orig, x); 136 __func__, bip, bp, orig, x);
137 ASSERT(0);
138 }
137 } 139 }
138} 140}
139#else 141#else
@@ -983,15 +985,14 @@ xfs_buf_iodone_callbacks(
983 if (XFS_BUF_TARGET(bp) != lasttarg || 985 if (XFS_BUF_TARGET(bp) != lasttarg ||
984 time_after(jiffies, (lasttime + 5*HZ))) { 986 time_after(jiffies, (lasttime + 5*HZ))) {
985 lasttime = jiffies; 987 lasttime = jiffies;
986 cmn_err(CE_ALERT, "Device %s, XFS metadata write error" 988 xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
987 " block 0x%llx in %s",
988 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 989 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
989 (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); 990 (__uint64_t)XFS_BUF_ADDR(bp));
990 } 991 }
991 lasttarg = XFS_BUF_TARGET(bp); 992 lasttarg = XFS_BUF_TARGET(bp);
992 993
993 /* 994 /*
994 * If the write was asynchronous then noone will be looking for the 995 * If the write was asynchronous then no one will be looking for the
995 * error. Clear the error state and write the buffer out again. 996 * error. Clear the error state and write the buffer out again.
996 * 997 *
997 * During sync or umount we'll write all pending buffers again 998 * During sync or umount we'll write all pending buffers again
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 1c00bedb3175..6102ac6d1dff 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1995,13 +1995,12 @@ xfs_da_do_buf(
1995 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); 1995 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
1996 if (unlikely(error == EFSCORRUPTED)) { 1996 if (unlikely(error == EFSCORRUPTED)) {
1997 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 1997 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
1998 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", 1998 xfs_alert(mp, "%s: bno %lld dir: inode %lld",
1999 (long long)bno); 1999 __func__, (long long)bno,
2000 cmn_err(CE_ALERT, "dir: inode %lld\n",
2001 (long long)dp->i_ino); 2000 (long long)dp->i_ino);
2002 for (i = 0; i < nmap; i++) { 2001 for (i = 0; i < nmap; i++) {
2003 cmn_err(CE_ALERT, 2002 xfs_alert(mp,
2004 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", 2003"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2005 i, 2004 i,
2006 (long long)mapp[i].br_startoff, 2005 (long long)mapp[i].br_startoff,
2007 (long long)mapp[i].br_startblock, 2006 (long long)mapp[i].br_startblock,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e60490bc00a6..be628677c288 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -270,9 +270,9 @@ xfs_swap_extents(
270 /* check inode formats now that data is flushed */ 270 /* check inode formats now that data is flushed */
271 error = xfs_swap_extents_check_format(ip, tip); 271 error = xfs_swap_extents_check_format(ip, tip);
272 if (error) { 272 if (error) {
273 xfs_fs_cmn_err(CE_NOTE, mp, 273 xfs_notice(mp,
274 "%s: inode 0x%llx format is incompatible for exchanging.", 274 "%s: inode 0x%llx format is incompatible for exchanging.",
275 __FILE__, ip->i_ino); 275 __func__, ip->i_ino);
276 goto out_unlock; 276 goto out_unlock;
277 } 277 }
278 278
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index a1321bc7f192..dba7a71cedf3 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -159,7 +159,7 @@ xfs_dir_ino_validate(
159 XFS_AGINO_TO_INO(mp, agno, agino) == ino; 159 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
160 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, 160 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
161 XFS_RANDOM_DIR_INO_VALIDATE))) { 161 XFS_RANDOM_DIR_INO_VALIDATE))) {
162 xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", 162 xfs_warn(mp, "Invalid inode number 0x%Lx",
163 (unsigned long long) ino); 163 (unsigned long long) ino);
164 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); 164 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
165 return XFS_ERROR(EFSCORRUPTED); 165 return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index f9a0864b696a..a0aab7d3294f 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -899,10 +899,9 @@ xfs_dir2_leafn_rebalance(
899 if(blk2->index < 0) { 899 if(blk2->index < 0) {
900 state->inleaf = 1; 900 state->inleaf = 1;
901 blk2->index = 0; 901 blk2->index = 0;
902 cmn_err(CE_ALERT, 902 xfs_alert(args->dp->i_mount,
903 "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " 903 "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n",
904 "blk1->index %d\n", 904 __func__, blk1->index);
905 blk1->index);
906 } 905 }
907} 906}
908 907
@@ -1641,26 +1640,22 @@ xfs_dir2_node_addname_int(
1641 } 1640 }
1642 1641
1643 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { 1642 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
1644 cmn_err(CE_ALERT, 1643 xfs_alert(mp,
1645 "xfs_dir2_node_addname_int: dir ino " 1644 "%s: dir ino " "%llu needed freesp block %lld for\n"
1646 "%llu needed freesp block %lld for\n" 1645 " data block %lld, got %lld ifbno %llu lastfbno %d",
1647 " data block %lld, got %lld\n" 1646 __func__, (unsigned long long)dp->i_ino,
1648 " ifbno %llu lastfbno %d\n",
1649 (unsigned long long)dp->i_ino,
1650 (long long)xfs_dir2_db_to_fdb(mp, dbno), 1647 (long long)xfs_dir2_db_to_fdb(mp, dbno),
1651 (long long)dbno, (long long)fbno, 1648 (long long)dbno, (long long)fbno,
1652 (unsigned long long)ifbno, lastfbno); 1649 (unsigned long long)ifbno, lastfbno);
1653 if (fblk) { 1650 if (fblk) {
1654 cmn_err(CE_ALERT, 1651 xfs_alert(mp,
1655 " fblk 0x%p blkno %llu " 1652 " fblk 0x%p blkno %llu index %d magic 0x%x",
1656 "index %d magic 0x%x\n",
1657 fblk, 1653 fblk,
1658 (unsigned long long)fblk->blkno, 1654 (unsigned long long)fblk->blkno,
1659 fblk->index, 1655 fblk->index,
1660 fblk->magic); 1656 fblk->magic);
1661 } else { 1657 } else {
1662 cmn_err(CE_ALERT, 1658 xfs_alert(mp, " ... fblk is NULL");
1663 " ... fblk is NULL\n");
1664 } 1659 }
1665 XFS_ERROR_REPORT("xfs_dir2_node_addname_int", 1660 XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
1666 XFS_ERRLEVEL_LOW, mp); 1661 XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 4c7db74a05f7..39f06336b99d 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -48,7 +48,7 @@ xfs_error_trap(int e)
48 break; 48 break;
49 if (e != xfs_etrap[i]) 49 if (e != xfs_etrap[i])
50 continue; 50 continue;
51 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); 51 xfs_notice(NULL, "%s: error %d", __func__, e);
52 BUG(); 52 BUG();
53 break; 53 break;
54 } 54 }
@@ -74,7 +74,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
74 74
75 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 75 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
76 if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { 76 if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
77 cmn_err(CE_WARN, 77 xfs_warn(NULL,
78 "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", 78 "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
79 expression, file, line, xfs_etest_fsname[i]); 79 expression, file, line, xfs_etest_fsname[i]);
80 return 1; 80 return 1;
@@ -95,14 +95,14 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
95 95
96 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 96 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
97 if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { 97 if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
98 cmn_err(CE_WARN, "XFS error tag #%d on", error_tag); 98 xfs_warn(mp, "error tag #%d on", error_tag);
99 return 0; 99 return 0;
100 } 100 }
101 } 101 }
102 102
103 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 103 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
104 if (xfs_etest[i] == 0) { 104 if (xfs_etest[i] == 0) {
105 cmn_err(CE_WARN, "Turned on XFS error tag #%d", 105 xfs_warn(mp, "Turned on XFS error tag #%d",
106 error_tag); 106 error_tag);
107 xfs_etest[i] = error_tag; 107 xfs_etest[i] = error_tag;
108 xfs_etest_fsid[i] = fsid; 108 xfs_etest_fsid[i] = fsid;
@@ -114,7 +114,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
114 } 114 }
115 } 115 }
116 116
117 cmn_err(CE_WARN, "error tag overflow, too many turned on"); 117 xfs_warn(mp, "error tag overflow, too many turned on");
118 118
119 return 1; 119 return 1;
120} 120}
@@ -133,7 +133,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
133 if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && 133 if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) &&
134 xfs_etest[i] != 0) { 134 xfs_etest[i] != 0) {
135 cleared = 1; 135 cleared = 1;
136 cmn_err(CE_WARN, "Clearing XFS error tag #%d", 136 xfs_warn(mp, "Clearing XFS error tag #%d",
137 xfs_etest[i]); 137 xfs_etest[i]);
138 xfs_etest[i] = 0; 138 xfs_etest[i] = 0;
139 xfs_etest_fsid[i] = 0LL; 139 xfs_etest_fsid[i] = 0LL;
@@ -144,9 +144,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
144 } 144 }
145 145
146 if (loud || cleared) 146 if (loud || cleared)
147 cmn_err(CE_WARN, 147 xfs_warn(mp, "Cleared all XFS error tags for filesystem");
148 "Cleared all XFS error tags for filesystem \"%s\"",
149 mp->m_fsname);
150 148
151 return 0; 149 return 0;
152} 150}
@@ -162,9 +160,8 @@ xfs_error_report(
162 inst_t *ra) 160 inst_t *ra)
163{ 161{
164 if (level <= xfs_error_level) { 162 if (level <= xfs_error_level) {
165 xfs_cmn_err(XFS_PTAG_ERROR_REPORT, 163 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
166 CE_ALERT, mp, 164 "Internal error %s at line %d of file %s. Caller 0x%p\n",
167 "XFS internal error %s at line %d of file %s. Caller 0x%p\n",
168 tag, linenum, filename, ra); 165 tag, linenum, filename, ra);
169 166
170 xfs_stack_trace(); 167 xfs_stack_trace();
@@ -184,4 +181,5 @@ xfs_corruption_error(
184 if (level <= xfs_error_level) 181 if (level <= xfs_error_level)
185 xfs_hex_dump(p, 16); 182 xfs_hex_dump(p, 16);
186 xfs_error_report(tag, level, mp, filename, linenum, ra); 183 xfs_error_report(tag, level, mp, filename, linenum, ra);
184 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
187} 185}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 10dce5475f02..079a367f44ee 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -145,10 +145,8 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
145#endif /* DEBUG */ 145#endif /* DEBUG */
146 146
147/* 147/*
148 * XFS panic tags -- allow a call to xfs_cmn_err() be turned into 148 * XFS panic tags -- allow a call to xfs_alert_tag() be turned into
149 * a panic by setting xfs_panic_mask in a 149 * a panic by setting xfs_panic_mask in a sysctl.
150 * sysctl. update xfs_max[XFS_PARAM] if
151 * more are added.
152 */ 150 */
153#define XFS_NO_PTAG 0 151#define XFS_NO_PTAG 0
154#define XFS_PTAG_IFLUSH 0x00000001 152#define XFS_PTAG_IFLUSH 0x00000001
@@ -160,17 +158,4 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
160#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 158#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040
161#define XFS_PTAG_FSBLOCK_ZERO 0x00000080 159#define XFS_PTAG_FSBLOCK_ZERO 0x00000080
162 160
163struct xfs_mount;
164
165extern void xfs_hex_dump(void *p, int length);
166
167#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
168 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
169
170#define xfs_fs_mount_cmn_err(f, fmt, args...) \
171 do { \
172 if (!(f & XFS_MFSI_QUIET)) \
173 cmn_err(CE_WARN, "XFS: " fmt, ## args); \
174 } while (0)
175
176#endif /* __XFS_ERROR_H__ */ 161#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 85668efb3e3e..9153d2c77caf 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -385,8 +385,8 @@ xfs_growfs_data_private(
385 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 385 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
386 XFS_FSS_TO_BB(mp, 1), 0, &bp); 386 XFS_FSS_TO_BB(mp, 1), 0, &bp);
387 if (error) { 387 if (error) {
388 xfs_fs_cmn_err(CE_WARN, mp, 388 xfs_warn(mp,
389 "error %d reading secondary superblock for ag %d", 389 "error %d reading secondary superblock for ag %d",
390 error, agno); 390 error, agno);
391 break; 391 break;
392 } 392 }
@@ -399,7 +399,7 @@ xfs_growfs_data_private(
399 if (!(error = xfs_bwrite(mp, bp))) { 399 if (!(error = xfs_bwrite(mp, bp))) {
400 continue; 400 continue;
401 } else { 401 } else {
402 xfs_fs_cmn_err(CE_WARN, mp, 402 xfs_warn(mp,
403 "write error %d updating secondary superblock for ag %d", 403 "write error %d updating secondary superblock for ag %d",
404 error, agno); 404 error, agno);
405 break; /* no point in continuing */ 405 break; /* no point in continuing */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0626a32c3447..84ebeec16642 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1055,28 +1055,23 @@ xfs_difree(
1055 */ 1055 */
1056 agno = XFS_INO_TO_AGNO(mp, inode); 1056 agno = XFS_INO_TO_AGNO(mp, inode);
1057 if (agno >= mp->m_sb.sb_agcount) { 1057 if (agno >= mp->m_sb.sb_agcount) {
1058 cmn_err(CE_WARN, 1058 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1059 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", 1059 __func__, agno, mp->m_sb.sb_agcount);
1060 agno, mp->m_sb.sb_agcount, mp->m_fsname);
1061 ASSERT(0); 1060 ASSERT(0);
1062 return XFS_ERROR(EINVAL); 1061 return XFS_ERROR(EINVAL);
1063 } 1062 }
1064 agino = XFS_INO_TO_AGINO(mp, inode); 1063 agino = XFS_INO_TO_AGINO(mp, inode);
1065 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1064 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1066 cmn_err(CE_WARN, 1065 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1067 "xfs_difree: inode != XFS_AGINO_TO_INO() " 1066 __func__, (unsigned long long)inode,
1068 "(%llu != %llu) on %s. Returning EINVAL.", 1067 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1069 (unsigned long long)inode,
1070 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
1071 mp->m_fsname);
1072 ASSERT(0); 1068 ASSERT(0);
1073 return XFS_ERROR(EINVAL); 1069 return XFS_ERROR(EINVAL);
1074 } 1070 }
1075 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1071 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1076 if (agbno >= mp->m_sb.sb_agblocks) { 1072 if (agbno >= mp->m_sb.sb_agblocks) {
1077 cmn_err(CE_WARN, 1073 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1078 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", 1074 __func__, agbno, mp->m_sb.sb_agblocks);
1079 agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
1080 ASSERT(0); 1075 ASSERT(0);
1081 return XFS_ERROR(EINVAL); 1076 return XFS_ERROR(EINVAL);
1082 } 1077 }
@@ -1085,9 +1080,8 @@ xfs_difree(
1085 */ 1080 */
1086 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1081 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1087 if (error) { 1082 if (error) {
1088 cmn_err(CE_WARN, 1083 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1089 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1084 __func__, error);
1090 error, mp->m_fsname);
1091 return error; 1085 return error;
1092 } 1086 }
1093 agi = XFS_BUF_TO_AGI(agbp); 1087 agi = XFS_BUF_TO_AGI(agbp);
@@ -1106,17 +1100,15 @@ xfs_difree(
1106 * Look for the entry describing this inode. 1100 * Look for the entry describing this inode.
1107 */ 1101 */
1108 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { 1102 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
1109 cmn_err(CE_WARN, 1103 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
1110 "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", 1104 __func__, error);
1111 error, mp->m_fsname);
1112 goto error0; 1105 goto error0;
1113 } 1106 }
1114 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1107 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1115 error = xfs_inobt_get_rec(cur, &rec, &i); 1108 error = xfs_inobt_get_rec(cur, &rec, &i);
1116 if (error) { 1109 if (error) {
1117 cmn_err(CE_WARN, 1110 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
1118 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", 1111 __func__, error);
1119 error, mp->m_fsname);
1120 goto error0; 1112 goto error0;
1121 } 1113 }
1122 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1114 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
@@ -1157,8 +1149,8 @@ xfs_difree(
1157 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1149 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1158 1150
1159 if ((error = xfs_btree_delete(cur, &i))) { 1151 if ((error = xfs_btree_delete(cur, &i))) {
1160 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", 1152 xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
1161 error, mp->m_fsname); 1153 __func__, error);
1162 goto error0; 1154 goto error0;
1163 } 1155 }
1164 1156
@@ -1170,9 +1162,8 @@ xfs_difree(
1170 1162
1171 error = xfs_inobt_update(cur, &rec); 1163 error = xfs_inobt_update(cur, &rec);
1172 if (error) { 1164 if (error) {
1173 cmn_err(CE_WARN, 1165 xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
1174 "xfs_difree: xfs_inobt_update returned an error %d on %s.", 1166 __func__, error);
1175 error, mp->m_fsname);
1176 goto error0; 1167 goto error0;
1177 } 1168 }
1178 1169
@@ -1218,10 +1209,9 @@ xfs_imap_lookup(
1218 1209
1219 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1210 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1220 if (error) { 1211 if (error) {
1221 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1212 xfs_alert(mp,
1222 "xfs_ialloc_read_agi() returned " 1213 "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
1223 "error %d, agno %d", 1214 __func__, error, agno);
1224 error, agno);
1225 return error; 1215 return error;
1226 } 1216 }
1227 1217
@@ -1299,24 +1289,21 @@ xfs_imap(
1299 if (flags & XFS_IGET_UNTRUSTED) 1289 if (flags & XFS_IGET_UNTRUSTED)
1300 return XFS_ERROR(EINVAL); 1290 return XFS_ERROR(EINVAL);
1301 if (agno >= mp->m_sb.sb_agcount) { 1291 if (agno >= mp->m_sb.sb_agcount) {
1302 xfs_fs_cmn_err(CE_ALERT, mp, 1292 xfs_alert(mp,
1303 "xfs_imap: agno (%d) >= " 1293 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
1304 "mp->m_sb.sb_agcount (%d)", 1294 __func__, agno, mp->m_sb.sb_agcount);
1305 agno, mp->m_sb.sb_agcount);
1306 } 1295 }
1307 if (agbno >= mp->m_sb.sb_agblocks) { 1296 if (agbno >= mp->m_sb.sb_agblocks) {
1308 xfs_fs_cmn_err(CE_ALERT, mp, 1297 xfs_alert(mp,
1309 "xfs_imap: agbno (0x%llx) >= " 1298 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
1310 "mp->m_sb.sb_agblocks (0x%lx)", 1299 __func__, (unsigned long long)agbno,
1311 (unsigned long long) agbno, 1300 (unsigned long)mp->m_sb.sb_agblocks);
1312 (unsigned long) mp->m_sb.sb_agblocks);
1313 } 1301 }
1314 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1302 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1315 xfs_fs_cmn_err(CE_ALERT, mp, 1303 xfs_alert(mp,
1316 "xfs_imap: ino (0x%llx) != " 1304 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
1317 "XFS_AGINO_TO_INO(mp, agno, agino) " 1305 __func__, ino,
1318 "(0x%llx)", 1306 XFS_AGINO_TO_INO(mp, agno, agino));
1319 ino, XFS_AGINO_TO_INO(mp, agno, agino));
1320 } 1307 }
1321 xfs_stack_trace(); 1308 xfs_stack_trace();
1322#endif /* DEBUG */ 1309#endif /* DEBUG */
@@ -1388,10 +1375,9 @@ out_map:
1388 */ 1375 */
1389 if ((imap->im_blkno + imap->im_len) > 1376 if ((imap->im_blkno + imap->im_len) >
1390 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1377 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1391 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1378 xfs_alert(mp,
1392 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " 1379 "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
1393 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", 1380 __func__, (unsigned long long) imap->im_blkno,
1394 (unsigned long long) imap->im_blkno,
1395 (unsigned long long) imap->im_len, 1381 (unsigned long long) imap->im_len,
1396 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1382 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1397 return XFS_ERROR(EINVAL); 1383 return XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index be7cf625421f..a37480a6e023 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -110,8 +110,8 @@ xfs_inobp_check(
110 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 110 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
111 i * mp->m_sb.sb_inodesize); 111 i * mp->m_sb.sb_inodesize);
112 if (!dip->di_next_unlinked) { 112 if (!dip->di_next_unlinked) {
113 xfs_fs_cmn_err(CE_ALERT, mp, 113 xfs_alert(mp,
114 "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 114 "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
115 bp); 115 bp);
116 ASSERT(dip->di_next_unlinked); 116 ASSERT(dip->di_next_unlinked);
117 } 117 }
@@ -142,10 +142,9 @@ xfs_imap_to_bp(
142 (int)imap->im_len, buf_flags, &bp); 142 (int)imap->im_len, buf_flags, &bp);
143 if (error) { 143 if (error) {
144 if (error != EAGAIN) { 144 if (error != EAGAIN) {
145 cmn_err(CE_WARN, 145 xfs_warn(mp,
146 "xfs_imap_to_bp: xfs_trans_read_buf()returned " 146 "%s: xfs_trans_read_buf() returned error %d.",
147 "an error %d on %s. Returning error.", 147 __func__, error);
148 error, mp->m_fsname);
149 } else { 148 } else {
150 ASSERT(buf_flags & XBF_TRYLOCK); 149 ASSERT(buf_flags & XBF_TRYLOCK);
151 } 150 }
@@ -180,12 +179,11 @@ xfs_imap_to_bp(
180 XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 179 XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
181 XFS_ERRLEVEL_HIGH, mp, dip); 180 XFS_ERRLEVEL_HIGH, mp, dip);
182#ifdef DEBUG 181#ifdef DEBUG
183 cmn_err(CE_PANIC, 182 xfs_emerg(mp,
184 "Device %s - bad inode magic/vsn " 183 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
185 "daddr %lld #%d (magic=%x)",
186 XFS_BUFTARG_NAME(mp->m_ddev_targp),
187 (unsigned long long)imap->im_blkno, i, 184 (unsigned long long)imap->im_blkno, i,
188 be16_to_cpu(dip->di_magic)); 185 be16_to_cpu(dip->di_magic));
186 ASSERT(0);
189#endif 187#endif
190 xfs_trans_brelse(tp, bp); 188 xfs_trans_brelse(tp, bp);
191 return XFS_ERROR(EFSCORRUPTED); 189 return XFS_ERROR(EFSCORRUPTED);
@@ -317,7 +315,7 @@ xfs_iformat(
317 if (unlikely(be32_to_cpu(dip->di_nextents) + 315 if (unlikely(be32_to_cpu(dip->di_nextents) +
318 be16_to_cpu(dip->di_anextents) > 316 be16_to_cpu(dip->di_anextents) >
319 be64_to_cpu(dip->di_nblocks))) { 317 be64_to_cpu(dip->di_nblocks))) {
320 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 318 xfs_warn(ip->i_mount,
321 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 319 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
322 (unsigned long long)ip->i_ino, 320 (unsigned long long)ip->i_ino,
323 (int)(be32_to_cpu(dip->di_nextents) + 321 (int)(be32_to_cpu(dip->di_nextents) +
@@ -330,8 +328,7 @@ xfs_iformat(
330 } 328 }
331 329
332 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 330 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
333 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 331 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
334 "corrupt dinode %Lu, forkoff = 0x%x.",
335 (unsigned long long)ip->i_ino, 332 (unsigned long long)ip->i_ino,
336 dip->di_forkoff); 333 dip->di_forkoff);
337 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 334 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -341,7 +338,7 @@ xfs_iformat(
341 338
342 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 339 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
343 !ip->i_mount->m_rtdev_targp)) { 340 !ip->i_mount->m_rtdev_targp)) {
344 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 341 xfs_warn(ip->i_mount,
345 "corrupt dinode %Lu, has realtime flag set.", 342 "corrupt dinode %Lu, has realtime flag set.",
346 ip->i_ino); 343 ip->i_ino);
347 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 344 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
@@ -373,9 +370,8 @@ xfs_iformat(
373 * no local regular files yet 370 * no local regular files yet
374 */ 371 */
375 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { 372 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
376 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 373 xfs_warn(ip->i_mount,
377 "corrupt inode %Lu " 374 "corrupt inode %Lu (local format for regular file).",
378 "(local format for regular file).",
379 (unsigned long long) ip->i_ino); 375 (unsigned long long) ip->i_ino);
380 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 376 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
381 XFS_ERRLEVEL_LOW, 377 XFS_ERRLEVEL_LOW,
@@ -385,9 +381,8 @@ xfs_iformat(
385 381
386 di_size = be64_to_cpu(dip->di_size); 382 di_size = be64_to_cpu(dip->di_size);
387 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 383 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
388 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 384 xfs_warn(ip->i_mount,
389 "corrupt inode %Lu " 385 "corrupt inode %Lu (bad size %Ld for local inode).",
390 "(bad size %Ld for local inode).",
391 (unsigned long long) ip->i_ino, 386 (unsigned long long) ip->i_ino,
392 (long long) di_size); 387 (long long) di_size);
393 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 388 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -431,9 +426,8 @@ xfs_iformat(
431 size = be16_to_cpu(atp->hdr.totsize); 426 size = be16_to_cpu(atp->hdr.totsize);
432 427
433 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 428 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
434 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 429 xfs_warn(ip->i_mount,
435 "corrupt inode %Lu " 430 "corrupt inode %Lu (bad attr fork size %Ld).",
436 "(bad attr fork size %Ld).",
437 (unsigned long long) ip->i_ino, 431 (unsigned long long) ip->i_ino,
438 (long long) size); 432 (long long) size);
439 XFS_CORRUPTION_ERROR("xfs_iformat(8)", 433 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
@@ -488,9 +482,8 @@ xfs_iformat_local(
488 * kmem_alloc() or memcpy() below. 482 * kmem_alloc() or memcpy() below.
489 */ 483 */
490 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 484 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
491 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 485 xfs_warn(ip->i_mount,
492 "corrupt inode %Lu " 486 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
493 "(bad size %d for local fork, size = %d).",
494 (unsigned long long) ip->i_ino, size, 487 (unsigned long long) ip->i_ino, size,
495 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 488 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
496 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 489 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -547,8 +540,7 @@ xfs_iformat_extents(
547 * kmem_alloc() or memcpy() below. 540 * kmem_alloc() or memcpy() below.
548 */ 541 */
549 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 542 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
550 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 543 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
551 "corrupt inode %Lu ((a)extents = %d).",
552 (unsigned long long) ip->i_ino, nex); 544 (unsigned long long) ip->i_ino, nex);
553 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 545 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
554 ip->i_mount, dip); 546 ip->i_mount, dip);
@@ -623,11 +615,10 @@ xfs_iformat_btree(
623 || XFS_BMDR_SPACE_CALC(nrecs) > 615 || XFS_BMDR_SPACE_CALC(nrecs) >
624 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 616 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
625 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 617 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
626 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 618 xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
627 "corrupt inode %Lu (btree).",
628 (unsigned long long) ip->i_ino); 619 (unsigned long long) ip->i_ino);
629 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 620 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
630 ip->i_mount); 621 ip->i_mount, dip);
631 return XFS_ERROR(EFSCORRUPTED); 622 return XFS_ERROR(EFSCORRUPTED);
632 } 623 }
633 624
@@ -813,11 +804,9 @@ xfs_iread(
813 */ 804 */
814 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { 805 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
815#ifdef DEBUG 806#ifdef DEBUG
816 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 807 xfs_alert(mp,
817 "dip->di_magic (0x%x) != " 808 "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
818 "XFS_DINODE_MAGIC (0x%x)", 809 __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
819 be16_to_cpu(dip->di_magic),
820 XFS_DINODE_MAGIC);
821#endif /* DEBUG */ 810#endif /* DEBUG */
822 error = XFS_ERROR(EINVAL); 811 error = XFS_ERROR(EINVAL);
823 goto out_brelse; 812 goto out_brelse;
@@ -835,9 +824,8 @@ xfs_iread(
835 error = xfs_iformat(ip, dip); 824 error = xfs_iformat(ip, dip);
836 if (error) { 825 if (error) {
837#ifdef DEBUG 826#ifdef DEBUG
838 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 827 xfs_alert(mp, "%s: xfs_iformat() returned error %d",
839 "xfs_iformat() returned error %d", 828 __func__, error);
840 error);
841#endif /* DEBUG */ 829#endif /* DEBUG */
842 goto out_brelse; 830 goto out_brelse;
843 } 831 }
@@ -1016,8 +1004,8 @@ xfs_ialloc(
1016 * This is because we're setting fields here we need 1004 * This is because we're setting fields here we need
1017 * to prevent others from looking at until we're done. 1005 * to prevent others from looking at until we're done.
1018 */ 1006 */
1019 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1007 error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
1020 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1008 XFS_ILOCK_EXCL, &ip);
1021 if (error) 1009 if (error)
1022 return error; 1010 return error;
1023 ASSERT(ip != NULL); 1011 ASSERT(ip != NULL);
@@ -1166,6 +1154,7 @@ xfs_ialloc(
1166 /* 1154 /*
1167 * Log the new values stuffed into the inode. 1155 * Log the new values stuffed into the inode.
1168 */ 1156 */
1157 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
1169 xfs_trans_log_inode(tp, ip, flags); 1158 xfs_trans_log_inode(tp, ip, flags);
1170 1159
1171 /* now that we have an i_mode we can setup inode ops and unlock */ 1160 /* now that we have an i_mode we can setup inode ops and unlock */
@@ -1820,9 +1809,8 @@ xfs_iunlink_remove(
1820 */ 1809 */
1821 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1810 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1822 if (error) { 1811 if (error) {
1823 cmn_err(CE_WARN, 1812 xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
1824 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1813 __func__, error);
1825 error, mp->m_fsname);
1826 return error; 1814 return error;
1827 } 1815 }
1828 next_agino = be32_to_cpu(dip->di_next_unlinked); 1816 next_agino = be32_to_cpu(dip->di_next_unlinked);
@@ -1867,9 +1855,9 @@ xfs_iunlink_remove(
1867 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1855 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1868 &last_ibp, &last_offset, 0); 1856 &last_ibp, &last_offset, 0);
1869 if (error) { 1857 if (error) {
1870 cmn_err(CE_WARN, 1858 xfs_warn(mp,
1871 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 1859 "%s: xfs_inotobp() returned error %d.",
1872 error, mp->m_fsname); 1860 __func__, error);
1873 return error; 1861 return error;
1874 } 1862 }
1875 next_agino = be32_to_cpu(last_dip->di_next_unlinked); 1863 next_agino = be32_to_cpu(last_dip->di_next_unlinked);
@@ -1882,9 +1870,8 @@ xfs_iunlink_remove(
1882 */ 1870 */
1883 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1871 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1884 if (error) { 1872 if (error) {
1885 cmn_err(CE_WARN, 1873 xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
1886 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1874 __func__, error);
1887 error, mp->m_fsname);
1888 return error; 1875 return error;
1889 } 1876 }
1890 next_agino = be32_to_cpu(dip->di_next_unlinked); 1877 next_agino = be32_to_cpu(dip->di_next_unlinked);
@@ -2802,7 +2789,7 @@ xfs_iflush(
2802 2789
2803 /* 2790 /*
2804 * We can't flush the inode until it is unpinned, so wait for it if we 2791 * We can't flush the inode until it is unpinned, so wait for it if we
2805 * are allowed to block. We know noone new can pin it, because we are 2792 * are allowed to block. We know no one new can pin it, because we are
2806 * holding the inode lock shared and you need to hold it exclusively to 2793 * holding the inode lock shared and you need to hold it exclusively to
2807 * pin the inode. 2794 * pin the inode.
2808 * 2795 *
@@ -2848,7 +2835,7 @@ xfs_iflush(
2848 * Get the buffer containing the on-disk inode. 2835 * Get the buffer containing the on-disk inode.
2849 */ 2836 */
2850 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 2837 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
2851 (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); 2838 (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
2852 if (error || !bp) { 2839 if (error || !bp) {
2853 xfs_ifunlock(ip); 2840 xfs_ifunlock(ip);
2854 return error; 2841 return error;
@@ -2939,16 +2926,16 @@ xfs_iflush_int(
2939 2926
2940 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, 2927 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
2941 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2928 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2942 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2929 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2943 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 2930 "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
2944 ip->i_ino, be16_to_cpu(dip->di_magic), dip); 2931 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
2945 goto corrupt_out; 2932 goto corrupt_out;
2946 } 2933 }
2947 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 2934 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
2948 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 2935 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
2949 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2936 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2950 "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 2937 "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
2951 ip->i_ino, ip, ip->i_d.di_magic); 2938 __func__, ip->i_ino, ip, ip->i_d.di_magic);
2952 goto corrupt_out; 2939 goto corrupt_out;
2953 } 2940 }
2954 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 2941 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
@@ -2956,9 +2943,9 @@ xfs_iflush_int(
2956 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 2943 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
2957 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 2944 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
2958 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 2945 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
2959 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2946 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2960 "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 2947 "%s: Bad regular inode %Lu, ptr 0x%p",
2961 ip->i_ino, ip); 2948 __func__, ip->i_ino, ip);
2962 goto corrupt_out; 2949 goto corrupt_out;
2963 } 2950 }
2964 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 2951 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
@@ -2967,28 +2954,28 @@ xfs_iflush_int(
2967 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 2954 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
2968 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 2955 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
2969 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 2956 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
2970 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2957 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2971 "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 2958 "%s: Bad directory inode %Lu, ptr 0x%p",
2972 ip->i_ino, ip); 2959 __func__, ip->i_ino, ip);
2973 goto corrupt_out; 2960 goto corrupt_out;
2974 } 2961 }
2975 } 2962 }
2976 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 2963 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
2977 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 2964 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
2978 XFS_RANDOM_IFLUSH_5)) { 2965 XFS_RANDOM_IFLUSH_5)) {
2979 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2966 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2980 "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 2967 "%s: detected corrupt incore inode %Lu, "
2981 ip->i_ino, 2968 "total extents = %d, nblocks = %Ld, ptr 0x%p",
2969 __func__, ip->i_ino,
2982 ip->i_d.di_nextents + ip->i_d.di_anextents, 2970 ip->i_d.di_nextents + ip->i_d.di_anextents,
2983 ip->i_d.di_nblocks, 2971 ip->i_d.di_nblocks, ip);
2984 ip);
2985 goto corrupt_out; 2972 goto corrupt_out;
2986 } 2973 }
2987 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 2974 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
2988 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 2975 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
2989 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2976 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2990 "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 2977 "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
2991 ip->i_ino, ip->i_d.di_forkoff, ip); 2978 __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
2992 goto corrupt_out; 2979 goto corrupt_out;
2993 } 2980 }
2994 /* 2981 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 5c95fa8ec11d..ff4e2a30227d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -111,7 +111,7 @@ struct xfs_imap {
111 * Generally, we do not want to hold the i_rlock while holding the 111 * Generally, we do not want to hold the i_rlock while holding the
112 * i_ilock. Hierarchy is i_iolock followed by i_rlock. 112 * i_ilock. Hierarchy is i_iolock followed by i_rlock.
113 * 113 *
114 * xfs_iptr_t contains all the inode fields upto and including the 114 * xfs_iptr_t contains all the inode fields up to and including the
115 * i_mnext and i_mprev fields, it is used as a marker in the inode 115 * i_mnext and i_mprev fields, it is used as a marker in the inode
116 * chain off the mount structure by xfs_sync calls. 116 * chain off the mount structure by xfs_sync calls.
117 */ 117 */
@@ -336,7 +336,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
336 336
337/* 337/*
338 * Project quota id helpers (previously projid was 16bit only 338 * Project quota id helpers (previously projid was 16bit only
339 * and using two 16bit values to hold new 32bit projid was choosen 339 * and using two 16bit values to hold new 32bit projid was chosen
340 * to retain compatibility with "old" filesystems). 340 * to retain compatibility with "old" filesystems).
341 */ 341 */
342static inline prid_t 342static inline prid_t
@@ -409,28 +409,35 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
409/* 409/*
410 * Flags for lockdep annotations. 410 * Flags for lockdep annotations.
411 * 411 *
412 * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes 412 * XFS_LOCK_PARENT - for directory operations that require locking a
413 * (ie directory operations that require locking a directory inode and 413 * parent directory inode and a child entry inode. The parent gets locked
414 * an entry inode). The first inode gets locked with this flag so it 414 * with this flag so it gets a lockdep subclass of 1 and the child entry
415 * gets a lockdep subclass of 1 and the second lock will have a lockdep 415 * lock will have a lockdep subclass of 0.
416 * subclass of 0. 416 *
417 * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
418 * inodes do not participate in the normal lock order, and thus have their
419 * own subclasses.
417 * 420 *
418 * XFS_LOCK_INUMORDER - for locking several inodes at the some time 421 * XFS_LOCK_INUMORDER - for locking several inodes at the some time
419 * with xfs_lock_inodes(). This flag is used as the starting subclass 422 * with xfs_lock_inodes(). This flag is used as the starting subclass
420 * and each subsequent lock acquired will increment the subclass by one. 423 * and each subsequent lock acquired will increment the subclass by one.
421 * So the first lock acquired will have a lockdep subclass of 2, the 424 * So the first lock acquired will have a lockdep subclass of 4, the
422 * second lock will have a lockdep subclass of 3, and so on. It is 425 * second lock will have a lockdep subclass of 5, and so on. It is
423 * the responsibility of the class builder to shift this to the correct 426 * the responsibility of the class builder to shift this to the correct
424 * portion of the lock_mode lockdep mask. 427 * portion of the lock_mode lockdep mask.
425 */ 428 */
426#define XFS_LOCK_PARENT 1 429#define XFS_LOCK_PARENT 1
427#define XFS_LOCK_INUMORDER 2 430#define XFS_LOCK_RTBITMAP 2
431#define XFS_LOCK_RTSUM 3
432#define XFS_LOCK_INUMORDER 4
428 433
429#define XFS_IOLOCK_SHIFT 16 434#define XFS_IOLOCK_SHIFT 16
430#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) 435#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
431 436
432#define XFS_ILOCK_SHIFT 24 437#define XFS_ILOCK_SHIFT 24
433#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) 438#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
439#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
440#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
434 441
435#define XFS_IOLOCK_DEP_MASK 0x00ff0000 442#define XFS_IOLOCK_DEP_MASK 0x00ff0000
436#define XFS_ILOCK_DEP_MASK 0xff000000 443#define XFS_ILOCK_DEP_MASK 0xff000000
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index fd4f398bd6f1..576fdfe81d60 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -198,6 +198,41 @@ xfs_inode_item_size(
198} 198}
199 199
200/* 200/*
201 * xfs_inode_item_format_extents - convert in-core extents to on-disk form
202 *
203 * For either the data or attr fork in extent format, we need to endian convert
204 * the in-core extent as we place them into the on-disk inode. In this case, we
205 * need to do this conversion before we write the extents into the log. Because
206 * we don't have the disk inode to write into here, we allocate a buffer and
207 * format the extents into it via xfs_iextents_copy(). We free the buffer in
208 * the unlock routine after the copy for the log has been made.
209 *
210 * In the case of the data fork, the in-core and on-disk fork sizes can be
211 * different due to delayed allocation extents. We only log on-disk extents
212 * here, so always use the physical fork size to determine the size of the
213 * buffer we need to allocate.
214 */
215STATIC void
216xfs_inode_item_format_extents(
217 struct xfs_inode *ip,
218 struct xfs_log_iovec *vecp,
219 int whichfork,
220 int type)
221{
222 xfs_bmbt_rec_t *ext_buffer;
223
224 ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
225 if (whichfork == XFS_DATA_FORK)
226 ip->i_itemp->ili_extents_buf = ext_buffer;
227 else
228 ip->i_itemp->ili_aextents_buf = ext_buffer;
229
230 vecp->i_addr = ext_buffer;
231 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
232 vecp->i_type = type;
233}
234
235/*
201 * This is called to fill in the vector of log iovecs for the 236 * This is called to fill in the vector of log iovecs for the
202 * given inode log item. It fills the first item with an inode 237 * given inode log item. It fills the first item with an inode
203 * log format structure, the second with the on-disk inode structure, 238 * log format structure, the second with the on-disk inode structure,
@@ -213,7 +248,6 @@ xfs_inode_item_format(
213 struct xfs_inode *ip = iip->ili_inode; 248 struct xfs_inode *ip = iip->ili_inode;
214 uint nvecs; 249 uint nvecs;
215 size_t data_bytes; 250 size_t data_bytes;
216 xfs_bmbt_rec_t *ext_buffer;
217 xfs_mount_t *mp; 251 xfs_mount_t *mp;
218 252
219 vecp->i_addr = &iip->ili_format; 253 vecp->i_addr = &iip->ili_format;
@@ -320,22 +354,8 @@ xfs_inode_item_format(
320 } else 354 } else
321#endif 355#endif
322 { 356 {
323 /* 357 xfs_inode_item_format_extents(ip, vecp,
324 * There are delayed allocation extents 358 XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
325 * in the inode, or we need to convert
326 * the extents to on disk format.
327 * Use xfs_iextents_copy()
328 * to copy only the real extents into
329 * a separate buffer. We'll free the
330 * buffer in the unlock routine.
331 */
332 ext_buffer = kmem_alloc(ip->i_df.if_bytes,
333 KM_SLEEP);
334 iip->ili_extents_buf = ext_buffer;
335 vecp->i_addr = ext_buffer;
336 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
337 XFS_DATA_FORK);
338 vecp->i_type = XLOG_REG_TYPE_IEXT;
339 } 359 }
340 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 360 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
341 iip->ili_format.ilf_dsize = vecp->i_len; 361 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -445,19 +465,12 @@ xfs_inode_item_format(
445 */ 465 */
446 vecp->i_addr = ip->i_afp->if_u1.if_extents; 466 vecp->i_addr = ip->i_afp->if_u1.if_extents;
447 vecp->i_len = ip->i_afp->if_bytes; 467 vecp->i_len = ip->i_afp->if_bytes;
468 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
448#else 469#else
449 ASSERT(iip->ili_aextents_buf == NULL); 470 ASSERT(iip->ili_aextents_buf == NULL);
450 /* 471 xfs_inode_item_format_extents(ip, vecp,
451 * Need to endian flip before logging 472 XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
452 */
453 ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
454 KM_SLEEP);
455 iip->ili_aextents_buf = ext_buffer;
456 vecp->i_addr = ext_buffer;
457 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
458 XFS_ATTR_FORK);
459#endif 473#endif
460 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
461 iip->ili_format.ilf_asize = vecp->i_len; 474 iip->ili_format.ilf_asize = vecp->i_len;
462 vecp++; 475 vecp++;
463 nvecs++; 476 nvecs++;
@@ -760,11 +773,11 @@ xfs_inode_item_push(
760 * Push the inode to it's backing buffer. This will not remove the 773 * Push the inode to it's backing buffer. This will not remove the
761 * inode from the AIL - a further push will be required to trigger a 774 * inode from the AIL - a further push will be required to trigger a
762 * buffer push. However, this allows all the dirty inodes to be pushed 775 * buffer push. However, this allows all the dirty inodes to be pushed
763 * to the buffer before it is pushed to disk. THe buffer IO completion 776 * to the buffer before it is pushed to disk. The buffer IO completion
764 * will pull th einode from the AIL, mark it clean and unlock the flush 777 * will pull the inode from the AIL, mark it clean and unlock the flush
765 * lock. 778 * lock.
766 */ 779 */
767 (void) xfs_iflush(ip, 0); 780 (void) xfs_iflush(ip, SYNC_TRYLOCK);
768 xfs_iunlock(ip, XFS_ILOCK_SHARED); 781 xfs_iunlock(ip, XFS_ILOCK_SHARED);
769} 782}
770 783
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 8a0f044750c3..091d82b94c4d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -101,11 +101,11 @@ xfs_iomap_eof_align_last_fsb(
101} 101}
102 102
103STATIC int 103STATIC int
104xfs_cmn_err_fsblock_zero( 104xfs_alert_fsblock_zero(
105 xfs_inode_t *ip, 105 xfs_inode_t *ip,
106 xfs_bmbt_irec_t *imap) 106 xfs_bmbt_irec_t *imap)
107{ 107{
108 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, 108 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
109 "Access to block zero in inode %llu " 109 "Access to block zero in inode %llu "
110 "start_block: %llx start_off: %llx " 110 "start_block: %llx start_off: %llx "
111 "blkcnt: %llx extent-state: %x\n", 111 "blkcnt: %llx extent-state: %x\n",
@@ -246,7 +246,7 @@ xfs_iomap_write_direct(
246 } 246 }
247 247
248 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { 248 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
249 error = xfs_cmn_err_fsblock_zero(ip, imap); 249 error = xfs_alert_fsblock_zero(ip, imap);
250 goto error_out; 250 goto error_out;
251 } 251 }
252 252
@@ -464,7 +464,7 @@ retry:
464 } 464 }
465 465
466 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) 466 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
467 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 467 return xfs_alert_fsblock_zero(ip, &imap[0]);
468 468
469 *ret_imap = imap[0]; 469 *ret_imap = imap[0];
470 return 0; 470 return 0;
@@ -614,7 +614,7 @@ xfs_iomap_write_allocate(
614 * covers at least part of the callers request 614 * covers at least part of the callers request
615 */ 615 */
616 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) 616 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
617 return xfs_cmn_err_fsblock_zero(ip, imap); 617 return xfs_alert_fsblock_zero(ip, imap);
618 618
619 if ((offset_fsb >= imap->br_startoff) && 619 if ((offset_fsb >= imap->br_startoff) &&
620 (offset_fsb < (imap->br_startoff + 620 (offset_fsb < (imap->br_startoff +
@@ -724,7 +724,7 @@ xfs_iomap_write_unwritten(
724 return XFS_ERROR(error); 724 return XFS_ERROR(error);
725 725
726 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 726 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
727 return xfs_cmn_err_fsblock_zero(ip, &imap); 727 return xfs_alert_fsblock_zero(ip, &imap);
728 728
729 if ((numblks_fsb = imap.br_blockcount) == 0) { 729 if ((numblks_fsb = imap.br_blockcount) == 0) {
730 /* 730 /*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index dc1882adaf54..751e94fe1f77 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -204,7 +204,6 @@ xfs_bulkstat(
204 xfs_agi_t *agi; /* agi header data */ 204 xfs_agi_t *agi; /* agi header data */
205 xfs_agino_t agino; /* inode # in allocation group */ 205 xfs_agino_t agino; /* inode # in allocation group */
206 xfs_agnumber_t agno; /* allocation group number */ 206 xfs_agnumber_t agno; /* allocation group number */
207 xfs_daddr_t bno; /* inode cluster start daddr */
208 int chunkidx; /* current index into inode chunk */ 207 int chunkidx; /* current index into inode chunk */
209 int clustidx; /* current index into inode cluster */ 208 int clustidx; /* current index into inode cluster */
210 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 209 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
@@ -463,7 +462,6 @@ xfs_bulkstat(
463 mp->m_sb.sb_inopblog); 462 mp->m_sb.sb_inopblog);
464 } 463 }
465 ino = XFS_AGINO_TO_INO(mp, agno, agino); 464 ino = XFS_AGINO_TO_INO(mp, agno, agino);
466 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
467 /* 465 /*
468 * Skip if this inode is free. 466 * Skip if this inode is free.
469 */ 467 */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ae6fef1ff563..b612ce4520ae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -374,11 +374,10 @@ xfs_log_mount(
374 int error; 374 int error;
375 375
376 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 376 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
377 cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); 377 xfs_notice(mp, "Mounting Filesystem");
378 else { 378 else {
379 cmn_err(CE_NOTE, 379 xfs_notice(mp,
380 "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", 380"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent.");
381 mp->m_fsname);
382 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 381 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
383 } 382 }
384 383
@@ -393,7 +392,7 @@ xfs_log_mount(
393 */ 392 */
394 error = xfs_trans_ail_init(mp); 393 error = xfs_trans_ail_init(mp);
395 if (error) { 394 if (error) {
396 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); 395 xfs_warn(mp, "AIL initialisation failed: error %d", error);
397 goto out_free_log; 396 goto out_free_log;
398 } 397 }
399 mp->m_log->l_ailp = mp->m_ail; 398 mp->m_log->l_ailp = mp->m_ail;
@@ -413,7 +412,8 @@ xfs_log_mount(
413 if (readonly) 412 if (readonly)
414 mp->m_flags |= XFS_MOUNT_RDONLY; 413 mp->m_flags |= XFS_MOUNT_RDONLY;
415 if (error) { 414 if (error) {
416 cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); 415 xfs_warn(mp, "log mount/recovery failed: error %d",
416 error);
417 goto out_destroy_ail; 417 goto out_destroy_ail;
418 } 418 }
419 } 419 }
@@ -542,10 +542,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
542 */ 542 */
543 } 543 }
544 544
545 if (error) { 545 if (error)
546 xfs_fs_cmn_err(CE_ALERT, mp, 546 xfs_alert(mp, "%s: unmount record failed", __func__);
547 "xfs_log_unmount: unmount record failed");
548 }
549 547
550 548
551 spin_lock(&log->l_icloglock); 549 spin_lock(&log->l_icloglock);
@@ -763,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
763 break; 761 break;
764 case XLOG_STATE_COVER_NEED: 762 case XLOG_STATE_COVER_NEED:
765 case XLOG_STATE_COVER_NEED2: 763 case XLOG_STATE_COVER_NEED2:
766 if (!xfs_trans_ail_tail(log->l_ailp) && 764 if (!xfs_ail_min_lsn(log->l_ailp) &&
767 xlog_iclogs_empty(log)) { 765 xlog_iclogs_empty(log)) {
768 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 766 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
769 log->l_covered_state = XLOG_STATE_COVER_DONE; 767 log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -803,7 +801,7 @@ xlog_assign_tail_lsn(
803 xfs_lsn_t tail_lsn; 801 xfs_lsn_t tail_lsn;
804 struct log *log = mp->m_log; 802 struct log *log = mp->m_log;
805 803
806 tail_lsn = xfs_trans_ail_tail(mp->m_ail); 804 tail_lsn = xfs_ail_min_lsn(mp->m_ail);
807 if (!tail_lsn) 805 if (!tail_lsn)
808 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 806 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
809 807
@@ -852,7 +850,7 @@ xlog_space_left(
852 * In this case we just want to return the size of the 850 * In this case we just want to return the size of the
853 * log as the amount of space left. 851 * log as the amount of space left.
854 */ 852 */
855 xfs_fs_cmn_err(CE_ALERT, log->l_mp, 853 xfs_alert(log->l_mp,
856 "xlog_space_left: head behind tail\n" 854 "xlog_space_left: head behind tail\n"
857 " tail_cycle = %d, tail_bytes = %d\n" 855 " tail_cycle = %d, tail_bytes = %d\n"
858 " GH cycle = %d, GH bytes = %d", 856 " GH cycle = %d, GH bytes = %d",
@@ -1001,7 +999,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1001 999
1002 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1000 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1003 if (!log) { 1001 if (!log) {
1004 xlog_warn("XFS: Log allocation failed: No memory!"); 1002 xfs_warn(mp, "Log allocation failed: No memory!");
1005 goto out; 1003 goto out;
1006 } 1004 }
1007 1005
@@ -1029,24 +1027,24 @@ xlog_alloc_log(xfs_mount_t *mp,
1029 if (xfs_sb_version_hassector(&mp->m_sb)) { 1027 if (xfs_sb_version_hassector(&mp->m_sb)) {
1030 log2_size = mp->m_sb.sb_logsectlog; 1028 log2_size = mp->m_sb.sb_logsectlog;
1031 if (log2_size < BBSHIFT) { 1029 if (log2_size < BBSHIFT) {
1032 xlog_warn("XFS: Log sector size too small " 1030 xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)",
1033 "(0x%x < 0x%x)", log2_size, BBSHIFT); 1031 log2_size, BBSHIFT);
1034 goto out_free_log; 1032 goto out_free_log;
1035 } 1033 }
1036 1034
1037 log2_size -= BBSHIFT; 1035 log2_size -= BBSHIFT;
1038 if (log2_size > mp->m_sectbb_log) { 1036 if (log2_size > mp->m_sectbb_log) {
1039 xlog_warn("XFS: Log sector size too large " 1037 xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)",
1040 "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log); 1038 log2_size, mp->m_sectbb_log);
1041 goto out_free_log; 1039 goto out_free_log;
1042 } 1040 }
1043 1041
1044 /* for larger sector sizes, must have v2 or external log */ 1042 /* for larger sector sizes, must have v2 or external log */
1045 if (log2_size && log->l_logBBstart > 0 && 1043 if (log2_size && log->l_logBBstart > 0 &&
1046 !xfs_sb_version_haslogv2(&mp->m_sb)) { 1044 !xfs_sb_version_haslogv2(&mp->m_sb)) {
1047 1045 xfs_warn(mp,
1048 xlog_warn("XFS: log sector size (0x%x) invalid " 1046 "log sector size (0x%x) invalid for configuration.",
1049 "for configuration.", log2_size); 1047 log2_size);
1050 goto out_free_log; 1048 goto out_free_log;
1051 } 1049 }
1052 } 1050 }
@@ -1241,7 +1239,7 @@ xlog_grant_push_ail(
1241 * the filesystem is shutting down. 1239 * the filesystem is shutting down.
1242 */ 1240 */
1243 if (!XLOG_FORCED_SHUTDOWN(log)) 1241 if (!XLOG_FORCED_SHUTDOWN(log))
1244 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1242 xfs_ail_push(log->l_ailp, threshold_lsn);
1245} 1243}
1246 1244
1247/* 1245/*
@@ -1563,38 +1561,36 @@ xlog_print_tic_res(
1563 "SWAPEXT" 1561 "SWAPEXT"
1564 }; 1562 };
1565 1563
1566 xfs_fs_cmn_err(CE_WARN, mp, 1564 xfs_warn(mp,
1567 "xfs_log_write: reservation summary:\n" 1565 "xfs_log_write: reservation summary:\n"
1568 " trans type = %s (%u)\n" 1566 " trans type = %s (%u)\n"
1569 " unit res = %d bytes\n" 1567 " unit res = %d bytes\n"
1570 " current res = %d bytes\n" 1568 " current res = %d bytes\n"
1571 " total reg = %u bytes (o/flow = %u bytes)\n" 1569 " total reg = %u bytes (o/flow = %u bytes)\n"
1572 " ophdrs = %u (ophdr space = %u bytes)\n" 1570 " ophdrs = %u (ophdr space = %u bytes)\n"
1573 " ophdr + reg = %u bytes\n" 1571 " ophdr + reg = %u bytes\n"
1574 " num regions = %u\n", 1572 " num regions = %u\n",
1575 ((ticket->t_trans_type <= 0 || 1573 ((ticket->t_trans_type <= 0 ||
1576 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? 1574 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
1577 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), 1575 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
1578 ticket->t_trans_type, 1576 ticket->t_trans_type,
1579 ticket->t_unit_res, 1577 ticket->t_unit_res,
1580 ticket->t_curr_res, 1578 ticket->t_curr_res,
1581 ticket->t_res_arr_sum, ticket->t_res_o_flow, 1579 ticket->t_res_arr_sum, ticket->t_res_o_flow,
1582 ticket->t_res_num_ophdrs, ophdr_spc, 1580 ticket->t_res_num_ophdrs, ophdr_spc,
1583 ticket->t_res_arr_sum + 1581 ticket->t_res_arr_sum +
1584 ticket->t_res_o_flow + ophdr_spc, 1582 ticket->t_res_o_flow + ophdr_spc,
1585 ticket->t_res_num); 1583 ticket->t_res_num);
1586 1584
1587 for (i = 0; i < ticket->t_res_num; i++) { 1585 for (i = 0; i < ticket->t_res_num; i++) {
1588 uint r_type = ticket->t_res_arr[i].r_type; 1586 uint r_type = ticket->t_res_arr[i].r_type;
1589 cmn_err(CE_WARN, 1587 xfs_warn(mp, "region[%u]: %s - %u bytes\n", i,
1590 "region[%u]: %s - %u bytes\n",
1591 i,
1592 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? 1588 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
1593 "bad-rtype" : res_type_str[r_type-1]), 1589 "bad-rtype" : res_type_str[r_type-1]),
1594 ticket->t_res_arr[i].r_len); 1590 ticket->t_res_arr[i].r_len);
1595 } 1591 }
1596 1592
1597 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, 1593 xfs_alert_tag(mp, XFS_PTAG_LOGRES,
1598 "xfs_log_write: reservation ran out. Need to up reservation"); 1594 "xfs_log_write: reservation ran out. Need to up reservation");
1599 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1595 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1600} 1596}
@@ -1682,7 +1678,7 @@ xlog_write_setup_ophdr(
1682 case XFS_LOG: 1678 case XFS_LOG:
1683 break; 1679 break;
1684 default: 1680 default:
1685 xfs_fs_cmn_err(CE_WARN, log->l_mp, 1681 xfs_warn(log->l_mp,
1686 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1682 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1687 ophdr->oh_clientid, ticket); 1683 ophdr->oh_clientid, ticket);
1688 return NULL; 1684 return NULL;
@@ -2264,7 +2260,7 @@ xlog_state_do_callback(
2264 if (repeats > 5000) { 2260 if (repeats > 5000) {
2265 flushcnt += repeats; 2261 flushcnt += repeats;
2266 repeats = 0; 2262 repeats = 0;
2267 xfs_fs_cmn_err(CE_WARN, log->l_mp, 2263 xfs_warn(log->l_mp,
2268 "%s: possible infinite loop (%d iterations)", 2264 "%s: possible infinite loop (%d iterations)",
2269 __func__, flushcnt); 2265 __func__, flushcnt);
2270 } 2266 }
@@ -3052,10 +3048,8 @@ xfs_log_force(
3052 int error; 3048 int error;
3053 3049
3054 error = _xfs_log_force(mp, flags, NULL); 3050 error = _xfs_log_force(mp, flags, NULL);
3055 if (error) { 3051 if (error)
3056 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " 3052 xfs_warn(mp, "%s: error %d returned.", __func__, error);
3057 "error %d returned.", error);
3058 }
3059} 3053}
3060 3054
3061/* 3055/*
@@ -3204,10 +3198,8 @@ xfs_log_force_lsn(
3204 int error; 3198 int error;
3205 3199
3206 error = _xfs_log_force_lsn(mp, lsn, flags, NULL); 3200 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3207 if (error) { 3201 if (error)
3208 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " 3202 xfs_warn(mp, "%s: error %d returned.", __func__, error);
3209 "error %d returned.", error);
3210 }
3211} 3203}
3212 3204
3213/* 3205/*
@@ -3412,9 +3404,20 @@ xlog_verify_dest_ptr(
3412 } 3404 }
3413 3405
3414 if (!good_ptr) 3406 if (!good_ptr)
3415 xlog_panic("xlog_verify_dest_ptr: invalid ptr"); 3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3416} 3408}
3417 3409
3410/*
3411 * Check to make sure the grant write head didn't just over lap the tail. If
3412 * the cycles are the same, we can't be overlapping. Otherwise, make sure that
3413 * the cycles differ by exactly one and check the byte count.
3414 *
3415 * This check is run unlocked, so can give false positives. Rather than assert
3416 * on failures, use a warn-once flag and a panic tag to allow the admin to
3417 * determine if they want to panic the machine when such an error occurs. For
3418 * debug kernels this will have the same effect as using an assert but, unlinke
3419 * an assert, it can be turned off at runtime.
3420 */
3418STATIC void 3421STATIC void
3419xlog_verify_grant_tail( 3422xlog_verify_grant_tail(
3420 struct log *log) 3423 struct log *log)
@@ -3422,17 +3425,22 @@ xlog_verify_grant_tail(
3422 int tail_cycle, tail_blocks; 3425 int tail_cycle, tail_blocks;
3423 int cycle, space; 3426 int cycle, space;
3424 3427
3425 /*
3426 * Check to make sure the grant write head didn't just over lap the
3427 * tail. If the cycles are the same, we can't be overlapping.
3428 * Otherwise, make sure that the cycles differ by exactly one and
3429 * check the byte count.
3430 */
3431 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3428 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
3432 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3429 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3433 if (tail_cycle != cycle) { 3430 if (tail_cycle != cycle) {
3434 ASSERT(cycle - 1 == tail_cycle); 3431 if (cycle - 1 != tail_cycle &&
3435 ASSERT(space <= BBTOB(tail_blocks)); 3432 !(log->l_flags & XLOG_TAIL_WARN)) {
3433 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3434 "%s: cycle - 1 != tail_cycle", __func__);
3435 log->l_flags |= XLOG_TAIL_WARN;
3436 }
3437
3438 if (space > BBTOB(tail_blocks) &&
3439 !(log->l_flags & XLOG_TAIL_WARN)) {
3440 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3441 "%s: space > BBTOB(tail_blocks)", __func__);
3442 log->l_flags |= XLOG_TAIL_WARN;
3443 }
3436 } 3444 }
3437} 3445}
3438 3446
@@ -3448,16 +3456,16 @@ xlog_verify_tail_lsn(xlog_t *log,
3448 blocks = 3456 blocks =
3449 log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); 3457 log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn));
3450 if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) 3458 if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize))
3451 xlog_panic("xlog_verify_tail_lsn: ran out of log space"); 3459 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
3452 } else { 3460 } else {
3453 ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); 3461 ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle);
3454 3462
3455 if (BLOCK_LSN(tail_lsn) == log->l_prev_block) 3463 if (BLOCK_LSN(tail_lsn) == log->l_prev_block)
3456 xlog_panic("xlog_verify_tail_lsn: tail wrapped"); 3464 xfs_emerg(log->l_mp, "%s: tail wrapped", __func__);
3457 3465
3458 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; 3466 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block;
3459 if (blocks < BTOBB(iclog->ic_offset) + 1) 3467 if (blocks < BTOBB(iclog->ic_offset) + 1)
3460 xlog_panic("xlog_verify_tail_lsn: ran out of log space"); 3468 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
3461 } 3469 }
3462} /* xlog_verify_tail_lsn */ 3470} /* xlog_verify_tail_lsn */
3463 3471
@@ -3497,22 +3505,23 @@ xlog_verify_iclog(xlog_t *log,
3497 icptr = log->l_iclog; 3505 icptr = log->l_iclog;
3498 for (i=0; i < log->l_iclog_bufs; i++) { 3506 for (i=0; i < log->l_iclog_bufs; i++) {
3499 if (icptr == NULL) 3507 if (icptr == NULL)
3500 xlog_panic("xlog_verify_iclog: invalid ptr"); 3508 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3501 icptr = icptr->ic_next; 3509 icptr = icptr->ic_next;
3502 } 3510 }
3503 if (icptr != log->l_iclog) 3511 if (icptr != log->l_iclog)
3504 xlog_panic("xlog_verify_iclog: corrupt iclog ring"); 3512 xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
3505 spin_unlock(&log->l_icloglock); 3513 spin_unlock(&log->l_icloglock);
3506 3514
3507 /* check log magic numbers */ 3515 /* check log magic numbers */
3508 if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) 3516 if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM)
3509 xlog_panic("xlog_verify_iclog: invalid magic num"); 3517 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
3510 3518
3511 ptr = (xfs_caddr_t) &iclog->ic_header; 3519 ptr = (xfs_caddr_t) &iclog->ic_header;
3512 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; 3520 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
3513 ptr += BBSIZE) { 3521 ptr += BBSIZE) {
3514 if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) 3522 if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
3515 xlog_panic("xlog_verify_iclog: unexpected magic num"); 3523 xfs_emerg(log->l_mp, "%s: unexpected magic num",
3524 __func__);
3516 } 3525 }
3517 3526
3518 /* check fields */ 3527 /* check fields */
@@ -3542,9 +3551,10 @@ xlog_verify_iclog(xlog_t *log,
3542 } 3551 }
3543 } 3552 }
3544 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) 3553 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
3545 cmn_err(CE_WARN, "xlog_verify_iclog: " 3554 xfs_warn(log->l_mp,
3546 "invalid clientid %d op 0x%p offset 0x%lx", 3555 "%s: invalid clientid %d op 0x%p offset 0x%lx",
3547 clientid, ophead, (unsigned long)field_offset); 3556 __func__, clientid, ophead,
3557 (unsigned long)field_offset);
3548 3558
3549 /* check length */ 3559 /* check length */
3550 field_offset = (__psint_t) 3560 field_offset = (__psint_t)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index d5f8be8f4bf6..5864850e9e34 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -87,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i)
87 return be32_to_cpu(i) >> 24; 87 return be32_to_cpu(i) >> 24;
88} 88}
89 89
90#define xlog_panic(args...) cmn_err(CE_PANIC, ## args)
91#define xlog_exit(args...) cmn_err(CE_PANIC, ## args)
92#define xlog_warn(args...) cmn_err(CE_WARN, ## args)
93
94/* 90/*
95 * In core log state 91 * In core log state
96 */ 92 */
@@ -148,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i)
148#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ 144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
149#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being 145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
150 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
151 148
152#ifdef __KERNEL__ 149#ifdef __KERNEL__
153/* 150/*
@@ -574,7 +571,7 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
574 * When we crack an atomic LSN, we sample it first so that the value will not 571 * When we crack an atomic LSN, we sample it first so that the value will not
575 * change while we are cracking it into the component values. This means we 572 * change while we are cracking it into the component values. This means we
576 * will always get consistent component values to work from. This should always 573 * will always get consistent component values to work from. This should always
577 * be used to smaple and crack LSNs taht are stored and updated in atomic 574 * be used to sample and crack LSNs that are stored and updated in atomic
578 * variables. 575 * variables.
579 */ 576 */
580static inline void 577static inline void
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index aa0ebb776903..5cc464a17c93 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -92,7 +92,7 @@ xlog_get_bp(
92 int nbblks) 92 int nbblks)
93{ 93{
94 if (!xlog_buf_bbcount_valid(log, nbblks)) { 94 if (!xlog_buf_bbcount_valid(log, nbblks)) {
95 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 95 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
96 nbblks); 96 nbblks);
97 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 97 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
98 return NULL; 98 return NULL;
@@ -101,7 +101,7 @@ xlog_get_bp(
101 /* 101 /*
102 * We do log I/O in units of log sectors (a power-of-2 102 * We do log I/O in units of log sectors (a power-of-2
103 * multiple of the basic block size), so we round up the 103 * multiple of the basic block size), so we round up the
104 * requested size to acommodate the basic blocks required 104 * requested size to accommodate the basic blocks required
105 * for complete log sectors. 105 * for complete log sectors.
106 * 106 *
107 * In addition, the buffer may be used for a non-sector- 107 * In addition, the buffer may be used for a non-sector-
@@ -112,7 +112,7 @@ xlog_get_bp(
112 * an issue. Nor will this be a problem if the log I/O is 112 * an issue. Nor will this be a problem if the log I/O is
113 * done in basic blocks (sector size 1). But otherwise we 113 * done in basic blocks (sector size 1). But otherwise we
114 * extend the buffer by one extra log sector to ensure 114 * extend the buffer by one extra log sector to ensure
115 * there's space to accomodate this possiblility. 115 * there's space to accommodate this possibility.
116 */ 116 */
117 if (nbblks > 1 && log->l_sectBBsize > 1) 117 if (nbblks > 1 && log->l_sectBBsize > 1)
118 nbblks += log->l_sectBBsize; 118 nbblks += log->l_sectBBsize;
@@ -160,7 +160,7 @@ xlog_bread_noalign(
160 int error; 160 int error;
161 161
162 if (!xlog_buf_bbcount_valid(log, nbblks)) { 162 if (!xlog_buf_bbcount_valid(log, nbblks)) {
163 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 163 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
164 nbblks); 164 nbblks);
165 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 165 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
166 return EFSCORRUPTED; 166 return EFSCORRUPTED;
@@ -219,7 +219,7 @@ xlog_bwrite(
219 int error; 219 int error;
220 220
221 if (!xlog_buf_bbcount_valid(log, nbblks)) { 221 if (!xlog_buf_bbcount_valid(log, nbblks)) {
222 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 222 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
223 nbblks); 223 nbblks);
224 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 224 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
225 return EFSCORRUPTED; 225 return EFSCORRUPTED;
@@ -254,9 +254,9 @@ xlog_header_check_dump(
254 xfs_mount_t *mp, 254 xfs_mount_t *mp,
255 xlog_rec_header_t *head) 255 xlog_rec_header_t *head)
256{ 256{
257 cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", 257 xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n",
258 __func__, &mp->m_sb.sb_uuid, XLOG_FMT); 258 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
259 cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", 259 xfs_debug(mp, " log : uuid = %pU, fmt = %d\n",
260 &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); 260 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
261} 261}
262#else 262#else
@@ -279,15 +279,15 @@ xlog_header_check_recover(
279 * a dirty log created in IRIX. 279 * a dirty log created in IRIX.
280 */ 280 */
281 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { 281 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
282 xlog_warn( 282 xfs_warn(mp,
283 "XFS: dirty log written in incompatible format - can't recover"); 283 "dirty log written in incompatible format - can't recover");
284 xlog_header_check_dump(mp, head); 284 xlog_header_check_dump(mp, head);
285 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 285 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
286 XFS_ERRLEVEL_HIGH, mp); 286 XFS_ERRLEVEL_HIGH, mp);
287 return XFS_ERROR(EFSCORRUPTED); 287 return XFS_ERROR(EFSCORRUPTED);
288 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 288 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
289 xlog_warn( 289 xfs_warn(mp,
290 "XFS: dirty log entry has mismatched uuid - can't recover"); 290 "dirty log entry has mismatched uuid - can't recover");
291 xlog_header_check_dump(mp, head); 291 xlog_header_check_dump(mp, head);
292 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 292 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
293 XFS_ERRLEVEL_HIGH, mp); 293 XFS_ERRLEVEL_HIGH, mp);
@@ -312,9 +312,9 @@ xlog_header_check_mount(
312 * h_fs_uuid is nil, we assume this log was last mounted 312 * h_fs_uuid is nil, we assume this log was last mounted
313 * by IRIX and continue. 313 * by IRIX and continue.
314 */ 314 */
315 xlog_warn("XFS: nil uuid in log - IRIX style log"); 315 xfs_warn(mp, "nil uuid in log - IRIX style log");
316 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 316 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
317 xlog_warn("XFS: log has mismatched uuid - can't recover"); 317 xfs_warn(mp, "log has mismatched uuid - can't recover");
318 xlog_header_check_dump(mp, head); 318 xlog_header_check_dump(mp, head);
319 XFS_ERROR_REPORT("xlog_header_check_mount", 319 XFS_ERROR_REPORT("xlog_header_check_mount",
320 XFS_ERRLEVEL_HIGH, mp); 320 XFS_ERRLEVEL_HIGH, mp);
@@ -490,8 +490,8 @@ xlog_find_verify_log_record(
490 for (i = (*last_blk) - 1; i >= 0; i--) { 490 for (i = (*last_blk) - 1; i >= 0; i--) {
491 if (i < start_blk) { 491 if (i < start_blk) {
492 /* valid log record not found */ 492 /* valid log record not found */
493 xlog_warn( 493 xfs_warn(log->l_mp,
494 "XFS: Log inconsistent (didn't find previous header)"); 494 "Log inconsistent (didn't find previous header)");
495 ASSERT(0); 495 ASSERT(0);
496 error = XFS_ERROR(EIO); 496 error = XFS_ERROR(EIO);
497 goto out; 497 goto out;
@@ -591,12 +591,12 @@ xlog_find_head(
591 * mkfs etc write a dummy unmount record to a fresh 591 * mkfs etc write a dummy unmount record to a fresh
592 * log so we can store the uuid in there 592 * log so we can store the uuid in there
593 */ 593 */
594 xlog_warn("XFS: totally zeroed log"); 594 xfs_warn(log->l_mp, "totally zeroed log");
595 } 595 }
596 596
597 return 0; 597 return 0;
598 } else if (error) { 598 } else if (error) {
599 xlog_warn("XFS: empty log check failed"); 599 xfs_warn(log->l_mp, "empty log check failed");
600 return error; 600 return error;
601 } 601 }
602 602
@@ -819,7 +819,7 @@ validate_head:
819 xlog_put_bp(bp); 819 xlog_put_bp(bp);
820 820
821 if (error) 821 if (error)
822 xlog_warn("XFS: failed to find log head"); 822 xfs_warn(log->l_mp, "failed to find log head");
823 return error; 823 return error;
824} 824}
825 825
@@ -912,7 +912,7 @@ xlog_find_tail(
912 } 912 }
913 } 913 }
914 if (!found) { 914 if (!found) {
915 xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); 915 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
916 ASSERT(0); 916 ASSERT(0);
917 return XFS_ERROR(EIO); 917 return XFS_ERROR(EIO);
918 } 918 }
@@ -1028,7 +1028,7 @@ done:
1028 xlog_put_bp(bp); 1028 xlog_put_bp(bp);
1029 1029
1030 if (error) 1030 if (error)
1031 xlog_warn("XFS: failed to locate log tail"); 1031 xfs_warn(log->l_mp, "failed to locate log tail");
1032 return error; 1032 return error;
1033} 1033}
1034 1034
@@ -1092,7 +1092,8 @@ xlog_find_zeroed(
1092 * the first block must be 1. If it's not, maybe we're 1092 * the first block must be 1. If it's not, maybe we're
1093 * not looking at a log... Bail out. 1093 * not looking at a log... Bail out.
1094 */ 1094 */
1095 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); 1095 xfs_warn(log->l_mp,
1096 "Log inconsistent or not a log (last==0, first!=1)");
1096 return XFS_ERROR(EINVAL); 1097 return XFS_ERROR(EINVAL);
1097 } 1098 }
1098 1099
@@ -1506,8 +1507,8 @@ xlog_recover_add_to_trans(
1506 if (list_empty(&trans->r_itemq)) { 1507 if (list_empty(&trans->r_itemq)) {
1507 /* we need to catch log corruptions here */ 1508 /* we need to catch log corruptions here */
1508 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1509 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1509 xlog_warn("XFS: xlog_recover_add_to_trans: " 1510 xfs_warn(log->l_mp, "%s: bad header magic number",
1510 "bad header magic number"); 1511 __func__);
1511 ASSERT(0); 1512 ASSERT(0);
1512 return XFS_ERROR(EIO); 1513 return XFS_ERROR(EIO);
1513 } 1514 }
@@ -1534,8 +1535,8 @@ xlog_recover_add_to_trans(
1534 if (item->ri_total == 0) { /* first region to be added */ 1535 if (item->ri_total == 0) { /* first region to be added */
1535 if (in_f->ilf_size == 0 || 1536 if (in_f->ilf_size == 0 ||
1536 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { 1537 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
1537 xlog_warn( 1538 xfs_warn(log->l_mp,
1538 "XFS: bad number of regions (%d) in inode log format", 1539 "bad number of regions (%d) in inode log format",
1539 in_f->ilf_size); 1540 in_f->ilf_size);
1540 ASSERT(0); 1541 ASSERT(0);
1541 return XFS_ERROR(EIO); 1542 return XFS_ERROR(EIO);
@@ -1592,8 +1593,9 @@ xlog_recover_reorder_trans(
1592 list_move_tail(&item->ri_list, &trans->r_itemq); 1593 list_move_tail(&item->ri_list, &trans->r_itemq);
1593 break; 1594 break;
1594 default: 1595 default:
1595 xlog_warn( 1596 xfs_warn(log->l_mp,
1596 "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); 1597 "%s: unrecognized type of log operation",
1598 __func__);
1597 ASSERT(0); 1599 ASSERT(0);
1598 return XFS_ERROR(EIO); 1600 return XFS_ERROR(EIO);
1599 } 1601 }
@@ -1803,8 +1805,9 @@ xlog_recover_do_inode_buffer(
1803 logged_nextp = item->ri_buf[item_index].i_addr + 1805 logged_nextp = item->ri_buf[item_index].i_addr +
1804 next_unlinked_offset - reg_buf_offset; 1806 next_unlinked_offset - reg_buf_offset;
1805 if (unlikely(*logged_nextp == 0)) { 1807 if (unlikely(*logged_nextp == 0)) {
1806 xfs_fs_cmn_err(CE_ALERT, mp, 1808 xfs_alert(mp,
1807 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", 1809 "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). "
1810 "Trying to replay bad (0) inode di_next_unlinked field.",
1808 item, bp); 1811 item, bp);
1809 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1812 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
1810 XFS_ERRLEVEL_LOW, mp); 1813 XFS_ERRLEVEL_LOW, mp);
@@ -1863,17 +1866,17 @@ xlog_recover_do_reg_buffer(
1863 if (buf_f->blf_flags & 1866 if (buf_f->blf_flags &
1864 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 1867 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
1865 if (item->ri_buf[i].i_addr == NULL) { 1868 if (item->ri_buf[i].i_addr == NULL) {
1866 cmn_err(CE_ALERT, 1869 xfs_alert(mp,
1867 "XFS: NULL dquot in %s.", __func__); 1870 "XFS: NULL dquot in %s.", __func__);
1868 goto next; 1871 goto next;
1869 } 1872 }
1870 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { 1873 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
1871 cmn_err(CE_ALERT, 1874 xfs_alert(mp,
1872 "XFS: dquot too small (%d) in %s.", 1875 "XFS: dquot too small (%d) in %s.",
1873 item->ri_buf[i].i_len, __func__); 1876 item->ri_buf[i].i_len, __func__);
1874 goto next; 1877 goto next;
1875 } 1878 }
1876 error = xfs_qm_dqcheck(item->ri_buf[i].i_addr, 1879 error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr,
1877 -1, 0, XFS_QMOPT_DOWARN, 1880 -1, 0, XFS_QMOPT_DOWARN,
1878 "dquot_buf_recover"); 1881 "dquot_buf_recover");
1879 if (error) 1882 if (error)
@@ -1898,6 +1901,7 @@ xlog_recover_do_reg_buffer(
1898 */ 1901 */
1899int 1902int
1900xfs_qm_dqcheck( 1903xfs_qm_dqcheck(
1904 struct xfs_mount *mp,
1901 xfs_disk_dquot_t *ddq, 1905 xfs_disk_dquot_t *ddq,
1902 xfs_dqid_t id, 1906 xfs_dqid_t id,
1903 uint type, /* used only when IO_dorepair is true */ 1907 uint type, /* used only when IO_dorepair is true */
@@ -1924,14 +1928,14 @@ xfs_qm_dqcheck(
1924 */ 1928 */
1925 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { 1929 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
1926 if (flags & XFS_QMOPT_DOWARN) 1930 if (flags & XFS_QMOPT_DOWARN)
1927 cmn_err(CE_ALERT, 1931 xfs_alert(mp,
1928 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 1932 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
1929 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 1933 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
1930 errs++; 1934 errs++;
1931 } 1935 }
1932 if (ddq->d_version != XFS_DQUOT_VERSION) { 1936 if (ddq->d_version != XFS_DQUOT_VERSION) {
1933 if (flags & XFS_QMOPT_DOWARN) 1937 if (flags & XFS_QMOPT_DOWARN)
1934 cmn_err(CE_ALERT, 1938 xfs_alert(mp,
1935 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 1939 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
1936 str, id, ddq->d_version, XFS_DQUOT_VERSION); 1940 str, id, ddq->d_version, XFS_DQUOT_VERSION);
1937 errs++; 1941 errs++;
@@ -1941,7 +1945,7 @@ xfs_qm_dqcheck(
1941 ddq->d_flags != XFS_DQ_PROJ && 1945 ddq->d_flags != XFS_DQ_PROJ &&
1942 ddq->d_flags != XFS_DQ_GROUP) { 1946 ddq->d_flags != XFS_DQ_GROUP) {
1943 if (flags & XFS_QMOPT_DOWARN) 1947 if (flags & XFS_QMOPT_DOWARN)
1944 cmn_err(CE_ALERT, 1948 xfs_alert(mp,
1945 "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 1949 "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
1946 str, id, ddq->d_flags); 1950 str, id, ddq->d_flags);
1947 errs++; 1951 errs++;
@@ -1949,7 +1953,7 @@ xfs_qm_dqcheck(
1949 1953
1950 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 1954 if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
1951 if (flags & XFS_QMOPT_DOWARN) 1955 if (flags & XFS_QMOPT_DOWARN)
1952 cmn_err(CE_ALERT, 1956 xfs_alert(mp,
1953 "%s : ondisk-dquot 0x%p, ID mismatch: " 1957 "%s : ondisk-dquot 0x%p, ID mismatch: "
1954 "0x%x expected, found id 0x%x", 1958 "0x%x expected, found id 0x%x",
1955 str, ddq, id, be32_to_cpu(ddq->d_id)); 1959 str, ddq, id, be32_to_cpu(ddq->d_id));
@@ -1962,9 +1966,8 @@ xfs_qm_dqcheck(
1962 be64_to_cpu(ddq->d_blk_softlimit)) { 1966 be64_to_cpu(ddq->d_blk_softlimit)) {
1963 if (!ddq->d_btimer) { 1967 if (!ddq->d_btimer) {
1964 if (flags & XFS_QMOPT_DOWARN) 1968 if (flags & XFS_QMOPT_DOWARN)
1965 cmn_err(CE_ALERT, 1969 xfs_alert(mp,
1966 "%s : Dquot ID 0x%x (0x%p) " 1970 "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
1967 "BLK TIMER NOT STARTED",
1968 str, (int)be32_to_cpu(ddq->d_id), ddq); 1971 str, (int)be32_to_cpu(ddq->d_id), ddq);
1969 errs++; 1972 errs++;
1970 } 1973 }
@@ -1974,9 +1977,8 @@ xfs_qm_dqcheck(
1974 be64_to_cpu(ddq->d_ino_softlimit)) { 1977 be64_to_cpu(ddq->d_ino_softlimit)) {
1975 if (!ddq->d_itimer) { 1978 if (!ddq->d_itimer) {
1976 if (flags & XFS_QMOPT_DOWARN) 1979 if (flags & XFS_QMOPT_DOWARN)
1977 cmn_err(CE_ALERT, 1980 xfs_alert(mp,
1978 "%s : Dquot ID 0x%x (0x%p) " 1981 "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
1979 "INODE TIMER NOT STARTED",
1980 str, (int)be32_to_cpu(ddq->d_id), ddq); 1982 str, (int)be32_to_cpu(ddq->d_id), ddq);
1981 errs++; 1983 errs++;
1982 } 1984 }
@@ -1986,9 +1988,8 @@ xfs_qm_dqcheck(
1986 be64_to_cpu(ddq->d_rtb_softlimit)) { 1988 be64_to_cpu(ddq->d_rtb_softlimit)) {
1987 if (!ddq->d_rtbtimer) { 1989 if (!ddq->d_rtbtimer) {
1988 if (flags & XFS_QMOPT_DOWARN) 1990 if (flags & XFS_QMOPT_DOWARN)
1989 cmn_err(CE_ALERT, 1991 xfs_alert(mp,
1990 "%s : Dquot ID 0x%x (0x%p) " 1992 "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
1991 "RTBLK TIMER NOT STARTED",
1992 str, (int)be32_to_cpu(ddq->d_id), ddq); 1993 str, (int)be32_to_cpu(ddq->d_id), ddq);
1993 errs++; 1994 errs++;
1994 } 1995 }
@@ -1999,7 +2000,7 @@ xfs_qm_dqcheck(
1999 return errs; 2000 return errs;
2000 2001
2001 if (flags & XFS_QMOPT_DOWARN) 2002 if (flags & XFS_QMOPT_DOWARN)
2002 cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); 2003 xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
2003 2004
2004 /* 2005 /*
2005 * Typically, a repair is only requested by quotacheck. 2006 * Typically, a repair is only requested by quotacheck.
@@ -2218,9 +2219,9 @@ xlog_recover_inode_pass2(
2218 */ 2219 */
2219 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { 2220 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
2220 xfs_buf_relse(bp); 2221 xfs_buf_relse(bp);
2221 xfs_fs_cmn_err(CE_ALERT, mp, 2222 xfs_alert(mp,
2222 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2223 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
2223 dip, bp, in_f->ilf_ino); 2224 __func__, dip, bp, in_f->ilf_ino);
2224 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 2225 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
2225 XFS_ERRLEVEL_LOW, mp); 2226 XFS_ERRLEVEL_LOW, mp);
2226 error = EFSCORRUPTED; 2227 error = EFSCORRUPTED;
@@ -2229,9 +2230,9 @@ xlog_recover_inode_pass2(
2229 dicp = item->ri_buf[1].i_addr; 2230 dicp = item->ri_buf[1].i_addr;
2230 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2231 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2231 xfs_buf_relse(bp); 2232 xfs_buf_relse(bp);
2232 xfs_fs_cmn_err(CE_ALERT, mp, 2233 xfs_alert(mp,
2233 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", 2234 "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
2234 item, in_f->ilf_ino); 2235 __func__, item, in_f->ilf_ino);
2235 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 2236 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
2236 XFS_ERRLEVEL_LOW, mp); 2237 XFS_ERRLEVEL_LOW, mp);
2237 error = EFSCORRUPTED; 2238 error = EFSCORRUPTED;
@@ -2263,9 +2264,10 @@ xlog_recover_inode_pass2(
2263 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 2264 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
2264 XFS_ERRLEVEL_LOW, mp, dicp); 2265 XFS_ERRLEVEL_LOW, mp, dicp);
2265 xfs_buf_relse(bp); 2266 xfs_buf_relse(bp);
2266 xfs_fs_cmn_err(CE_ALERT, mp, 2267 xfs_alert(mp,
2267 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2268 "%s: Bad regular inode log record, rec ptr 0x%p, "
2268 item, dip, bp, in_f->ilf_ino); 2269 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2270 __func__, item, dip, bp, in_f->ilf_ino);
2269 error = EFSCORRUPTED; 2271 error = EFSCORRUPTED;
2270 goto error; 2272 goto error;
2271 } 2273 }
@@ -2276,9 +2278,10 @@ xlog_recover_inode_pass2(
2276 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 2278 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
2277 XFS_ERRLEVEL_LOW, mp, dicp); 2279 XFS_ERRLEVEL_LOW, mp, dicp);
2278 xfs_buf_relse(bp); 2280 xfs_buf_relse(bp);
2279 xfs_fs_cmn_err(CE_ALERT, mp, 2281 xfs_alert(mp,
2280 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2282 "%s: Bad dir inode log record, rec ptr 0x%p, "
2281 item, dip, bp, in_f->ilf_ino); 2283 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2284 __func__, item, dip, bp, in_f->ilf_ino);
2282 error = EFSCORRUPTED; 2285 error = EFSCORRUPTED;
2283 goto error; 2286 goto error;
2284 } 2287 }
@@ -2287,9 +2290,10 @@ xlog_recover_inode_pass2(
2287 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 2290 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
2288 XFS_ERRLEVEL_LOW, mp, dicp); 2291 XFS_ERRLEVEL_LOW, mp, dicp);
2289 xfs_buf_relse(bp); 2292 xfs_buf_relse(bp);
2290 xfs_fs_cmn_err(CE_ALERT, mp, 2293 xfs_alert(mp,
2291 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2294 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2292 item, dip, bp, in_f->ilf_ino, 2295 "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2296 __func__, item, dip, bp, in_f->ilf_ino,
2293 dicp->di_nextents + dicp->di_anextents, 2297 dicp->di_nextents + dicp->di_anextents,
2294 dicp->di_nblocks); 2298 dicp->di_nblocks);
2295 error = EFSCORRUPTED; 2299 error = EFSCORRUPTED;
@@ -2299,8 +2303,9 @@ xlog_recover_inode_pass2(
2299 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 2303 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
2300 XFS_ERRLEVEL_LOW, mp, dicp); 2304 XFS_ERRLEVEL_LOW, mp, dicp);
2301 xfs_buf_relse(bp); 2305 xfs_buf_relse(bp);
2302 xfs_fs_cmn_err(CE_ALERT, mp, 2306 xfs_alert(mp,
2303 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2307 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2308 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
2304 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); 2309 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
2305 error = EFSCORRUPTED; 2310 error = EFSCORRUPTED;
2306 goto error; 2311 goto error;
@@ -2309,9 +2314,9 @@ xlog_recover_inode_pass2(
2309 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 2314 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2310 XFS_ERRLEVEL_LOW, mp, dicp); 2315 XFS_ERRLEVEL_LOW, mp, dicp);
2311 xfs_buf_relse(bp); 2316 xfs_buf_relse(bp);
2312 xfs_fs_cmn_err(CE_ALERT, mp, 2317 xfs_alert(mp,
2313 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", 2318 "%s: Bad inode log record length %d, rec ptr 0x%p",
2314 item->ri_buf[1].i_len, item); 2319 __func__, item->ri_buf[1].i_len, item);
2315 error = EFSCORRUPTED; 2320 error = EFSCORRUPTED;
2316 goto error; 2321 goto error;
2317 } 2322 }
@@ -2398,7 +2403,7 @@ xlog_recover_inode_pass2(
2398 break; 2403 break;
2399 2404
2400 default: 2405 default:
2401 xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag"); 2406 xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
2402 ASSERT(0); 2407 ASSERT(0);
2403 xfs_buf_relse(bp); 2408 xfs_buf_relse(bp);
2404 error = EIO; 2409 error = EIO;
@@ -2467,13 +2472,11 @@ xlog_recover_dquot_pass2(
2467 2472
2468 recddq = item->ri_buf[1].i_addr; 2473 recddq = item->ri_buf[1].i_addr;
2469 if (recddq == NULL) { 2474 if (recddq == NULL) {
2470 cmn_err(CE_ALERT, 2475 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
2471 "XFS: NULL dquot in %s.", __func__);
2472 return XFS_ERROR(EIO); 2476 return XFS_ERROR(EIO);
2473 } 2477 }
2474 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { 2478 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
2475 cmn_err(CE_ALERT, 2479 xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
2476 "XFS: dquot too small (%d) in %s.",
2477 item->ri_buf[1].i_len, __func__); 2480 item->ri_buf[1].i_len, __func__);
2478 return XFS_ERROR(EIO); 2481 return XFS_ERROR(EIO);
2479 } 2482 }
@@ -2498,12 +2501,10 @@ xlog_recover_dquot_pass2(
2498 */ 2501 */
2499 dq_f = item->ri_buf[0].i_addr; 2502 dq_f = item->ri_buf[0].i_addr;
2500 ASSERT(dq_f); 2503 ASSERT(dq_f);
2501 if ((error = xfs_qm_dqcheck(recddq, 2504 error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2502 dq_f->qlf_id, 2505 "xlog_recover_dquot_pass2 (log copy)");
2503 0, XFS_QMOPT_DOWARN, 2506 if (error)
2504 "xlog_recover_dquot_pass2 (log copy)"))) {
2505 return XFS_ERROR(EIO); 2507 return XFS_ERROR(EIO);
2506 }
2507 ASSERT(dq_f->qlf_len == 1); 2508 ASSERT(dq_f->qlf_len == 1);
2508 2509
2509 error = xfs_read_buf(mp, mp->m_ddev_targp, 2510 error = xfs_read_buf(mp, mp->m_ddev_targp,
@@ -2523,8 +2524,9 @@ xlog_recover_dquot_pass2(
2523 * was among a chunk of dquots created earlier, and we did some 2524 * was among a chunk of dquots created earlier, and we did some
2524 * minimal initialization then. 2525 * minimal initialization then.
2525 */ 2526 */
2526 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2527 error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2527 "xlog_recover_dquot_pass2")) { 2528 "xlog_recover_dquot_pass2");
2529 if (error) {
2528 xfs_buf_relse(bp); 2530 xfs_buf_relse(bp);
2529 return XFS_ERROR(EIO); 2531 return XFS_ERROR(EIO);
2530 } 2532 }
@@ -2676,9 +2678,8 @@ xlog_recover_commit_pass1(
2676 /* nothing to do in pass 1 */ 2678 /* nothing to do in pass 1 */
2677 return 0; 2679 return 0;
2678 default: 2680 default:
2679 xlog_warn( 2681 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2680 "XFS: invalid item type (%d) xlog_recover_commit_pass1", 2682 __func__, ITEM_TYPE(item));
2681 ITEM_TYPE(item));
2682 ASSERT(0); 2683 ASSERT(0);
2683 return XFS_ERROR(EIO); 2684 return XFS_ERROR(EIO);
2684 } 2685 }
@@ -2707,9 +2708,8 @@ xlog_recover_commit_pass2(
2707 /* nothing to do in pass2 */ 2708 /* nothing to do in pass2 */
2708 return 0; 2709 return 0;
2709 default: 2710 default:
2710 xlog_warn( 2711 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2711 "XFS: invalid item type (%d) xlog_recover_commit_pass2", 2712 __func__, ITEM_TYPE(item));
2712 ITEM_TYPE(item));
2713 ASSERT(0); 2713 ASSERT(0);
2714 return XFS_ERROR(EIO); 2714 return XFS_ERROR(EIO);
2715 } 2715 }
@@ -2751,10 +2751,11 @@ xlog_recover_commit_trans(
2751 2751
2752STATIC int 2752STATIC int
2753xlog_recover_unmount_trans( 2753xlog_recover_unmount_trans(
2754 struct log *log,
2754 xlog_recover_t *trans) 2755 xlog_recover_t *trans)
2755{ 2756{
2756 /* Do nothing now */ 2757 /* Do nothing now */
2757 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); 2758 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
2758 return 0; 2759 return 0;
2759} 2760}
2760 2761
@@ -2797,8 +2798,8 @@ xlog_recover_process_data(
2797 dp += sizeof(xlog_op_header_t); 2798 dp += sizeof(xlog_op_header_t);
2798 if (ohead->oh_clientid != XFS_TRANSACTION && 2799 if (ohead->oh_clientid != XFS_TRANSACTION &&
2799 ohead->oh_clientid != XFS_LOG) { 2800 ohead->oh_clientid != XFS_LOG) {
2800 xlog_warn( 2801 xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
2801 "XFS: xlog_recover_process_data: bad clientid"); 2802 __func__, ohead->oh_clientid);
2802 ASSERT(0); 2803 ASSERT(0);
2803 return (XFS_ERROR(EIO)); 2804 return (XFS_ERROR(EIO));
2804 } 2805 }
@@ -2811,8 +2812,8 @@ xlog_recover_process_data(
2811 be64_to_cpu(rhead->h_lsn)); 2812 be64_to_cpu(rhead->h_lsn));
2812 } else { 2813 } else {
2813 if (dp + be32_to_cpu(ohead->oh_len) > lp) { 2814 if (dp + be32_to_cpu(ohead->oh_len) > lp) {
2814 xlog_warn( 2815 xfs_warn(log->l_mp, "%s: bad length 0x%x",
2815 "XFS: xlog_recover_process_data: bad length"); 2816 __func__, be32_to_cpu(ohead->oh_len));
2816 WARN_ON(1); 2817 WARN_ON(1);
2817 return (XFS_ERROR(EIO)); 2818 return (XFS_ERROR(EIO));
2818 } 2819 }
@@ -2825,7 +2826,7 @@ xlog_recover_process_data(
2825 trans, pass); 2826 trans, pass);
2826 break; 2827 break;
2827 case XLOG_UNMOUNT_TRANS: 2828 case XLOG_UNMOUNT_TRANS:
2828 error = xlog_recover_unmount_trans(trans); 2829 error = xlog_recover_unmount_trans(log, trans);
2829 break; 2830 break;
2830 case XLOG_WAS_CONT_TRANS: 2831 case XLOG_WAS_CONT_TRANS:
2831 error = xlog_recover_add_to_cont_trans(log, 2832 error = xlog_recover_add_to_cont_trans(log,
@@ -2833,8 +2834,8 @@ xlog_recover_process_data(
2833 be32_to_cpu(ohead->oh_len)); 2834 be32_to_cpu(ohead->oh_len));
2834 break; 2835 break;
2835 case XLOG_START_TRANS: 2836 case XLOG_START_TRANS:
2836 xlog_warn( 2837 xfs_warn(log->l_mp, "%s: bad transaction",
2837 "XFS: xlog_recover_process_data: bad transaction"); 2838 __func__);
2838 ASSERT(0); 2839 ASSERT(0);
2839 error = XFS_ERROR(EIO); 2840 error = XFS_ERROR(EIO);
2840 break; 2841 break;
@@ -2844,8 +2845,8 @@ xlog_recover_process_data(
2844 dp, be32_to_cpu(ohead->oh_len)); 2845 dp, be32_to_cpu(ohead->oh_len));
2845 break; 2846 break;
2846 default: 2847 default:
2847 xlog_warn( 2848 xfs_warn(log->l_mp, "%s: bad flag 0x%x",
2848 "XFS: xlog_recover_process_data: bad flag"); 2849 __func__, flags);
2849 ASSERT(0); 2850 ASSERT(0);
2850 error = XFS_ERROR(EIO); 2851 error = XFS_ERROR(EIO);
2851 break; 2852 break;
@@ -3030,8 +3031,7 @@ xlog_recover_clear_agi_bucket(
3030out_abort: 3031out_abort:
3031 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3032 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3032out_error: 3033out_error:
3033 xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " 3034 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
3034 "failed to clear agi %d. Continuing.", agno);
3035 return; 3035 return;
3036} 3036}
3037 3037
@@ -3282,7 +3282,7 @@ xlog_valid_rec_header(
3282 if (unlikely( 3282 if (unlikely(
3283 (!rhead->h_version || 3283 (!rhead->h_version ||
3284 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3284 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
3285 xlog_warn("XFS: %s: unrecognised log version (%d).", 3285 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
3286 __func__, be32_to_cpu(rhead->h_version)); 3286 __func__, be32_to_cpu(rhead->h_version));
3287 return XFS_ERROR(EIO); 3287 return XFS_ERROR(EIO);
3288 } 3288 }
@@ -3740,10 +3740,9 @@ xlog_recover(
3740 return error; 3740 return error;
3741 } 3741 }
3742 3742
3743 cmn_err(CE_NOTE, 3743 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
3744 "Starting XFS recovery on filesystem: %s (logdev: %s)", 3744 log->l_mp->m_logname ? log->l_mp->m_logname
3745 log->l_mp->m_fsname, log->l_mp->m_logname ? 3745 : "internal");
3746 log->l_mp->m_logname : "internal");
3747 3746
3748 error = xlog_do_recover(log, head_blk, tail_blk); 3747 error = xlog_do_recover(log, head_blk, tail_blk);
3749 log->l_flags |= XLOG_RECOVERY_NEEDED; 3748 log->l_flags |= XLOG_RECOVERY_NEEDED;
@@ -3776,9 +3775,7 @@ xlog_recover_finish(
3776 int error; 3775 int error;
3777 error = xlog_recover_process_efis(log); 3776 error = xlog_recover_process_efis(log);
3778 if (error) { 3777 if (error) {
3779 cmn_err(CE_ALERT, 3778 xfs_alert(log->l_mp, "Failed to recover EFIs");
3780 "Failed to recover EFIs on filesystem: %s",
3781 log->l_mp->m_fsname);
3782 return error; 3779 return error;
3783 } 3780 }
3784 /* 3781 /*
@@ -3793,15 +3790,12 @@ xlog_recover_finish(
3793 3790
3794 xlog_recover_check_summary(log); 3791 xlog_recover_check_summary(log);
3795 3792
3796 cmn_err(CE_NOTE, 3793 xfs_notice(log->l_mp, "Ending recovery (logdev: %s)",
3797 "Ending XFS recovery on filesystem: %s (logdev: %s)", 3794 log->l_mp->m_logname ? log->l_mp->m_logname
3798 log->l_mp->m_fsname, log->l_mp->m_logname ? 3795 : "internal");
3799 log->l_mp->m_logname : "internal");
3800 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3796 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
3801 } else { 3797 } else {
3802 cmn_err(CE_DEBUG, 3798 xfs_info(log->l_mp, "Ending clean mount");
3803 "Ending clean XFS mount for filesystem: %s\n",
3804 log->l_mp->m_fsname);
3805 } 3799 }
3806 return 0; 3800 return 0;
3807} 3801}
@@ -3834,10 +3828,8 @@ xlog_recover_check_summary(
3834 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3828 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3835 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); 3829 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
3836 if (error) { 3830 if (error) {
3837 xfs_fs_cmn_err(CE_ALERT, mp, 3831 xfs_alert(mp, "%s agf read failed agno %d error %d",
3838 "xlog_recover_check_summary(agf)" 3832 __func__, agno, error);
3839 "agf read failed agno %d error %d",
3840 agno, error);
3841 } else { 3833 } else {
3842 agfp = XFS_BUF_TO_AGF(agfbp); 3834 agfp = XFS_BUF_TO_AGF(agfbp);
3843 freeblks += be32_to_cpu(agfp->agf_freeblks) + 3835 freeblks += be32_to_cpu(agfp->agf_freeblks) +
@@ -3846,7 +3838,10 @@ xlog_recover_check_summary(
3846 } 3838 }
3847 3839
3848 error = xfs_read_agi(mp, NULL, agno, &agibp); 3840 error = xfs_read_agi(mp, NULL, agno, &agibp);
3849 if (!error) { 3841 if (error) {
3842 xfs_alert(mp, "%s agi read failed agno %d error %d",
3843 __func__, agno, error);
3844 } else {
3850 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); 3845 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
3851 3846
3852 itotal += be32_to_cpu(agi->agi_count); 3847 itotal += be32_to_cpu(agi->agi_count);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d447aef84bc3..bb3f9a7b24ed 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -133,9 +133,7 @@ xfs_uuid_mount(
133 return 0; 133 return 0;
134 134
135 if (uuid_is_nil(uuid)) { 135 if (uuid_is_nil(uuid)) {
136 cmn_err(CE_WARN, 136 xfs_warn(mp, "Filesystem has nil UUID - can't mount");
137 "XFS: Filesystem %s has nil UUID - can't mount",
138 mp->m_fsname);
139 return XFS_ERROR(EINVAL); 137 return XFS_ERROR(EINVAL);
140 } 138 }
141 139
@@ -163,8 +161,7 @@ xfs_uuid_mount(
163 161
164 out_duplicate: 162 out_duplicate:
165 mutex_unlock(&xfs_uuid_table_mutex); 163 mutex_unlock(&xfs_uuid_table_mutex);
166 cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", 164 xfs_warn(mp, "Filesystem has duplicate UUID - can't mount");
167 mp->m_fsname);
168 return XFS_ERROR(EINVAL); 165 return XFS_ERROR(EINVAL);
169} 166}
170 167
@@ -311,6 +308,8 @@ xfs_mount_validate_sb(
311 xfs_sb_t *sbp, 308 xfs_sb_t *sbp,
312 int flags) 309 int flags)
313{ 310{
311 int loud = !(flags & XFS_MFSI_QUIET);
312
314 /* 313 /*
315 * If the log device and data device have the 314 * If the log device and data device have the
316 * same device number, the log is internal. 315 * same device number, the log is internal.
@@ -319,28 +318,32 @@ xfs_mount_validate_sb(
319 * a volume filesystem in a non-volume manner. 318 * a volume filesystem in a non-volume manner.
320 */ 319 */
321 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 320 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
322 xfs_fs_mount_cmn_err(flags, "bad magic number"); 321 if (loud)
322 xfs_warn(mp, "bad magic number");
323 return XFS_ERROR(EWRONGFS); 323 return XFS_ERROR(EWRONGFS);
324 } 324 }
325 325
326 if (!xfs_sb_good_version(sbp)) { 326 if (!xfs_sb_good_version(sbp)) {
327 xfs_fs_mount_cmn_err(flags, "bad version"); 327 if (loud)
328 xfs_warn(mp, "bad version");
328 return XFS_ERROR(EWRONGFS); 329 return XFS_ERROR(EWRONGFS);
329 } 330 }
330 331
331 if (unlikely( 332 if (unlikely(
332 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 333 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
333 xfs_fs_mount_cmn_err(flags, 334 if (loud)
334 "filesystem is marked as having an external log; " 335 xfs_warn(mp,
335 "specify logdev on the\nmount command line."); 336 "filesystem is marked as having an external log; "
337 "specify logdev on the mount command line.");
336 return XFS_ERROR(EINVAL); 338 return XFS_ERROR(EINVAL);
337 } 339 }
338 340
339 if (unlikely( 341 if (unlikely(
340 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 342 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
341 xfs_fs_mount_cmn_err(flags, 343 if (loud)
342 "filesystem is marked as having an internal log; " 344 xfs_warn(mp,
343 "do not specify logdev on\nthe mount command line."); 345 "filesystem is marked as having an internal log; "
346 "do not specify logdev on the mount command line.");
344 return XFS_ERROR(EINVAL); 347 return XFS_ERROR(EINVAL);
345 } 348 }
346 349
@@ -369,7 +372,8 @@ xfs_mount_validate_sb(
369 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 372 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
370 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 373 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
371 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 374 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
372 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 375 if (loud)
376 xfs_warn(mp, "SB sanity check 1 failed");
373 return XFS_ERROR(EFSCORRUPTED); 377 return XFS_ERROR(EFSCORRUPTED);
374 } 378 }
375 379
@@ -382,7 +386,8 @@ xfs_mount_validate_sb(
382 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 386 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
383 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 387 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
384 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 388 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
385 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 389 if (loud)
390 xfs_warn(mp, "SB sanity check 2 failed");
386 return XFS_ERROR(EFSCORRUPTED); 391 return XFS_ERROR(EFSCORRUPTED);
387 } 392 }
388 393
@@ -390,12 +395,12 @@ xfs_mount_validate_sb(
390 * Until this is fixed only page-sized or smaller data blocks work. 395 * Until this is fixed only page-sized or smaller data blocks work.
391 */ 396 */
392 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 397 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
393 xfs_fs_mount_cmn_err(flags, 398 if (loud) {
394 "file system with blocksize %d bytes", 399 xfs_warn(mp,
395 sbp->sb_blocksize); 400 "File system with blocksize %d bytes. "
396 xfs_fs_mount_cmn_err(flags, 401 "Only pagesize (%ld) or less will currently work.",
397 "only pagesize (%ld) or less will currently work.", 402 sbp->sb_blocksize, PAGE_SIZE);
398 PAGE_SIZE); 403 }
399 return XFS_ERROR(ENOSYS); 404 return XFS_ERROR(ENOSYS);
400 } 405 }
401 406
@@ -409,21 +414,23 @@ xfs_mount_validate_sb(
409 case 2048: 414 case 2048:
410 break; 415 break;
411 default: 416 default:
412 xfs_fs_mount_cmn_err(flags, 417 if (loud)
413 "inode size of %d bytes not supported", 418 xfs_warn(mp, "inode size of %d bytes not supported",
414 sbp->sb_inodesize); 419 sbp->sb_inodesize);
415 return XFS_ERROR(ENOSYS); 420 return XFS_ERROR(ENOSYS);
416 } 421 }
417 422
418 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 423 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
419 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 424 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
420 xfs_fs_mount_cmn_err(flags, 425 if (loud)
421 "file system too large to be mounted on this system."); 426 xfs_warn(mp,
427 "file system too large to be mounted on this system.");
422 return XFS_ERROR(EFBIG); 428 return XFS_ERROR(EFBIG);
423 } 429 }
424 430
425 if (unlikely(sbp->sb_inprogress)) { 431 if (unlikely(sbp->sb_inprogress)) {
426 xfs_fs_mount_cmn_err(flags, "file system busy"); 432 if (loud)
433 xfs_warn(mp, "file system busy");
427 return XFS_ERROR(EFSCORRUPTED); 434 return XFS_ERROR(EFSCORRUPTED);
428 } 435 }
429 436
@@ -431,8 +438,9 @@ xfs_mount_validate_sb(
431 * Version 1 directory format has never worked on Linux. 438 * Version 1 directory format has never worked on Linux.
432 */ 439 */
433 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { 440 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
434 xfs_fs_mount_cmn_err(flags, 441 if (loud)
435 "file system using version 1 directory format"); 442 xfs_warn(mp,
443 "file system using version 1 directory format");
436 return XFS_ERROR(ENOSYS); 444 return XFS_ERROR(ENOSYS);
437 } 445 }
438 446
@@ -673,6 +681,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
673 unsigned int sector_size; 681 unsigned int sector_size;
674 xfs_buf_t *bp; 682 xfs_buf_t *bp;
675 int error; 683 int error;
684 int loud = !(flags & XFS_MFSI_QUIET);
676 685
677 ASSERT(mp->m_sb_bp == NULL); 686 ASSERT(mp->m_sb_bp == NULL);
678 ASSERT(mp->m_ddev_targp != NULL); 687 ASSERT(mp->m_ddev_targp != NULL);
@@ -688,7 +697,8 @@ reread:
688 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, 697 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
689 XFS_SB_DADDR, sector_size, 0); 698 XFS_SB_DADDR, sector_size, 0);
690 if (!bp) { 699 if (!bp) {
691 xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); 700 if (loud)
701 xfs_warn(mp, "SB buffer read failed");
692 return EIO; 702 return EIO;
693 } 703 }
694 704
@@ -699,7 +709,8 @@ reread:
699 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 709 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
700 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 710 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
701 if (error) { 711 if (error) {
702 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 712 if (loud)
713 xfs_warn(mp, "SB validate failed");
703 goto release_buf; 714 goto release_buf;
704 } 715 }
705 716
@@ -707,9 +718,9 @@ reread:
707 * We must be able to do sector-sized and sector-aligned IO. 718 * We must be able to do sector-sized and sector-aligned IO.
708 */ 719 */
709 if (sector_size > mp->m_sb.sb_sectsize) { 720 if (sector_size > mp->m_sb.sb_sectsize) {
710 xfs_fs_mount_cmn_err(flags, 721 if (loud)
711 "device supports only %u byte sectors (not %u)", 722 xfs_warn(mp, "device supports %u byte sectors (not %u)",
712 sector_size, mp->m_sb.sb_sectsize); 723 sector_size, mp->m_sb.sb_sectsize);
713 error = ENOSYS; 724 error = ENOSYS;
714 goto release_buf; 725 goto release_buf;
715 } 726 }
@@ -853,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp)
853 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 864 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
854 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 865 (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
855 if (mp->m_flags & XFS_MOUNT_RETERR) { 866 if (mp->m_flags & XFS_MOUNT_RETERR) {
856 cmn_err(CE_WARN, 867 xfs_warn(mp, "alignment check 1 failed");
857 "XFS: alignment check 1 failed");
858 return XFS_ERROR(EINVAL); 868 return XFS_ERROR(EINVAL);
859 } 869 }
860 mp->m_dalign = mp->m_swidth = 0; 870 mp->m_dalign = mp->m_swidth = 0;
@@ -867,8 +877,9 @@ xfs_update_alignment(xfs_mount_t *mp)
867 if (mp->m_flags & XFS_MOUNT_RETERR) { 877 if (mp->m_flags & XFS_MOUNT_RETERR) {
868 return XFS_ERROR(EINVAL); 878 return XFS_ERROR(EINVAL);
869 } 879 }
870 xfs_fs_cmn_err(CE_WARN, mp, 880 xfs_warn(mp,
871"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 881 "stripe alignment turned off: sunit(%d)/swidth(%d) "
882 "incompatible with agsize(%d)",
872 mp->m_dalign, mp->m_swidth, 883 mp->m_dalign, mp->m_swidth,
873 sbp->sb_agblocks); 884 sbp->sb_agblocks);
874 885
@@ -878,9 +889,9 @@ xfs_update_alignment(xfs_mount_t *mp)
878 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 889 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
879 } else { 890 } else {
880 if (mp->m_flags & XFS_MOUNT_RETERR) { 891 if (mp->m_flags & XFS_MOUNT_RETERR) {
881 xfs_fs_cmn_err(CE_WARN, mp, 892 xfs_warn(mp,
882"stripe alignment turned off: sunit(%d) less than bsize(%d)", 893 "stripe alignment turned off: sunit(%d) less than bsize(%d)",
883 mp->m_dalign, 894 mp->m_dalign,
884 mp->m_blockmask +1); 895 mp->m_blockmask +1);
885 return XFS_ERROR(EINVAL); 896 return XFS_ERROR(EINVAL);
886 } 897 }
@@ -1026,14 +1037,14 @@ xfs_check_sizes(xfs_mount_t *mp)
1026 1037
1027 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 1038 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
1028 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 1039 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
1029 cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); 1040 xfs_warn(mp, "filesystem size mismatch detected");
1030 return XFS_ERROR(EFBIG); 1041 return XFS_ERROR(EFBIG);
1031 } 1042 }
1032 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, 1043 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
1033 d - XFS_FSS_TO_BB(mp, 1), 1044 d - XFS_FSS_TO_BB(mp, 1),
1034 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); 1045 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
1035 if (!bp) { 1046 if (!bp) {
1036 cmn_err(CE_WARN, "XFS: last sector read failed"); 1047 xfs_warn(mp, "last sector read failed");
1037 return EIO; 1048 return EIO;
1038 } 1049 }
1039 xfs_buf_relse(bp); 1050 xfs_buf_relse(bp);
@@ -1041,14 +1052,14 @@ xfs_check_sizes(xfs_mount_t *mp)
1041 if (mp->m_logdev_targp != mp->m_ddev_targp) { 1052 if (mp->m_logdev_targp != mp->m_ddev_targp) {
1042 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 1053 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
1043 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 1054 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
1044 cmn_err(CE_WARN, "XFS: log size mismatch detected"); 1055 xfs_warn(mp, "log size mismatch detected");
1045 return XFS_ERROR(EFBIG); 1056 return XFS_ERROR(EFBIG);
1046 } 1057 }
1047 bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, 1058 bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
1048 d - XFS_FSB_TO_BB(mp, 1), 1059 d - XFS_FSB_TO_BB(mp, 1),
1049 XFS_FSB_TO_B(mp, 1), 0); 1060 XFS_FSB_TO_B(mp, 1), 0);
1050 if (!bp) { 1061 if (!bp) {
1051 cmn_err(CE_WARN, "XFS: log device read failed"); 1062 xfs_warn(mp, "log device read failed");
1052 return EIO; 1063 return EIO;
1053 } 1064 }
1054 xfs_buf_relse(bp); 1065 xfs_buf_relse(bp);
@@ -1086,7 +1097,7 @@ xfs_mount_reset_sbqflags(
1086 return 0; 1097 return 0;
1087 1098
1088#ifdef QUOTADEBUG 1099#ifdef QUOTADEBUG
1089 xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); 1100 xfs_notice(mp, "Writing superblock quota changes");
1090#endif 1101#endif
1091 1102
1092 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1103 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
@@ -1094,8 +1105,7 @@ xfs_mount_reset_sbqflags(
1094 XFS_DEFAULT_LOG_COUNT); 1105 XFS_DEFAULT_LOG_COUNT);
1095 if (error) { 1106 if (error) {
1096 xfs_trans_cancel(tp, 0); 1107 xfs_trans_cancel(tp, 0);
1097 xfs_fs_cmn_err(CE_ALERT, mp, 1108 xfs_alert(mp, "%s: Superblock update failed!", __func__);
1098 "xfs_mount_reset_sbqflags: Superblock update failed!");
1099 return error; 1109 return error;
1100 } 1110 }
1101 1111
@@ -1161,8 +1171,7 @@ xfs_mountfs(
1161 * transaction subsystem is online. 1171 * transaction subsystem is online.
1162 */ 1172 */
1163 if (xfs_sb_has_mismatched_features2(sbp)) { 1173 if (xfs_sb_has_mismatched_features2(sbp)) {
1164 cmn_err(CE_WARN, 1174 xfs_warn(mp, "correcting sb_features alignment problem");
1165 "XFS: correcting sb_features alignment problem");
1166 sbp->sb_features2 |= sbp->sb_bad_features2; 1175 sbp->sb_features2 |= sbp->sb_bad_features2;
1167 sbp->sb_bad_features2 = sbp->sb_features2; 1176 sbp->sb_bad_features2 = sbp->sb_features2;
1168 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; 1177 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2;
@@ -1241,7 +1250,7 @@ xfs_mountfs(
1241 */ 1250 */
1242 error = xfs_rtmount_init(mp); 1251 error = xfs_rtmount_init(mp);
1243 if (error) { 1252 if (error) {
1244 cmn_err(CE_WARN, "XFS: RT mount failed"); 1253 xfs_warn(mp, "RT mount failed");
1245 goto out_remove_uuid; 1254 goto out_remove_uuid;
1246 } 1255 }
1247 1256
@@ -1272,12 +1281,12 @@ xfs_mountfs(
1272 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1281 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1273 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); 1282 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1274 if (error) { 1283 if (error) {
1275 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); 1284 xfs_warn(mp, "Failed per-ag init: %d", error);
1276 goto out_remove_uuid; 1285 goto out_remove_uuid;
1277 } 1286 }
1278 1287
1279 if (!sbp->sb_logblocks) { 1288 if (!sbp->sb_logblocks) {
1280 cmn_err(CE_WARN, "XFS: no log defined"); 1289 xfs_warn(mp, "no log defined");
1281 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); 1290 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
1282 error = XFS_ERROR(EFSCORRUPTED); 1291 error = XFS_ERROR(EFSCORRUPTED);
1283 goto out_free_perag; 1292 goto out_free_perag;
@@ -1290,7 +1299,7 @@ xfs_mountfs(
1290 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 1299 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
1291 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 1300 XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
1292 if (error) { 1301 if (error) {
1293 cmn_err(CE_WARN, "XFS: log mount failed"); 1302 xfs_warn(mp, "log mount failed");
1294 goto out_free_perag; 1303 goto out_free_perag;
1295 } 1304 }
1296 1305
@@ -1327,16 +1336,14 @@ xfs_mountfs(
1327 */ 1336 */
1328 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); 1337 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
1329 if (error) { 1338 if (error) {
1330 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1339 xfs_warn(mp, "failed to read root inode");
1331 goto out_log_dealloc; 1340 goto out_log_dealloc;
1332 } 1341 }
1333 1342
1334 ASSERT(rip != NULL); 1343 ASSERT(rip != NULL);
1335 1344
1336 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1345 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
1337 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1346 xfs_warn(mp, "corrupted root inode %llu: not a directory",
1338 cmn_err(CE_WARN, "Device %s - root %llu is not a directory",
1339 XFS_BUFTARG_NAME(mp->m_ddev_targp),
1340 (unsigned long long)rip->i_ino); 1347 (unsigned long long)rip->i_ino);
1341 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1348 xfs_iunlock(rip, XFS_ILOCK_EXCL);
1342 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1349 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
@@ -1356,7 +1363,7 @@ xfs_mountfs(
1356 /* 1363 /*
1357 * Free up the root inode. 1364 * Free up the root inode.
1358 */ 1365 */
1359 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1366 xfs_warn(mp, "failed to read RT inodes");
1360 goto out_rele_rip; 1367 goto out_rele_rip;
1361 } 1368 }
1362 1369
@@ -1368,7 +1375,7 @@ xfs_mountfs(
1368 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { 1375 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
1369 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1376 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1370 if (error) { 1377 if (error) {
1371 cmn_err(CE_WARN, "XFS: failed to write sb changes"); 1378 xfs_warn(mp, "failed to write sb changes");
1372 goto out_rtunmount; 1379 goto out_rtunmount;
1373 } 1380 }
1374 } 1381 }
@@ -1389,10 +1396,7 @@ xfs_mountfs(
1389 * quotachecked license. 1396 * quotachecked license.
1390 */ 1397 */
1391 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { 1398 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
1392 cmn_err(CE_NOTE, 1399 xfs_notice(mp, "resetting quota flags");
1393 "XFS: resetting qflags for filesystem %s",
1394 mp->m_fsname);
1395
1396 error = xfs_mount_reset_sbqflags(mp); 1400 error = xfs_mount_reset_sbqflags(mp);
1397 if (error) 1401 if (error)
1398 return error; 1402 return error;
@@ -1406,7 +1410,7 @@ xfs_mountfs(
1406 */ 1410 */
1407 error = xfs_log_mount_finish(mp); 1411 error = xfs_log_mount_finish(mp);
1408 if (error) { 1412 if (error) {
1409 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1413 xfs_warn(mp, "log mount finish failed");
1410 goto out_rtunmount; 1414 goto out_rtunmount;
1411 } 1415 }
1412 1416
@@ -1435,8 +1439,8 @@ xfs_mountfs(
1435 resblks = xfs_default_resblks(mp); 1439 resblks = xfs_default_resblks(mp);
1436 error = xfs_reserve_blocks(mp, &resblks, NULL); 1440 error = xfs_reserve_blocks(mp, &resblks, NULL);
1437 if (error) 1441 if (error)
1438 cmn_err(CE_WARN, "XFS: Unable to allocate reserve " 1442 xfs_warn(mp,
1439 "blocks. Continuing without a reserve pool."); 1443 "Unable to allocate reserve blocks. Continuing without reserve pool.");
1440 } 1444 }
1441 1445
1442 return 0; 1446 return 0;
@@ -1525,12 +1529,12 @@ xfs_unmountfs(
1525 resblks = 0; 1529 resblks = 0;
1526 error = xfs_reserve_blocks(mp, &resblks, NULL); 1530 error = xfs_reserve_blocks(mp, &resblks, NULL);
1527 if (error) 1531 if (error)
1528 cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " 1532 xfs_warn(mp, "Unable to free reserved block pool. "
1529 "Freespace may not be correct on next mount."); 1533 "Freespace may not be correct on next mount.");
1530 1534
1531 error = xfs_log_sbcount(mp, 1); 1535 error = xfs_log_sbcount(mp, 1);
1532 if (error) 1536 if (error)
1533 cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " 1537 xfs_warn(mp, "Unable to update superblock counters. "
1534 "Freespace may not be correct on next mount."); 1538 "Freespace may not be correct on next mount.");
1535 xfs_unmountfs_writesb(mp); 1539 xfs_unmountfs_writesb(mp);
1536 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1540 xfs_unmountfs_wait(mp); /* wait for async bufs */
@@ -2013,10 +2017,8 @@ xfs_dev_is_read_only(
2013 if (xfs_readonly_buftarg(mp->m_ddev_targp) || 2017 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
2014 xfs_readonly_buftarg(mp->m_logdev_targp) || 2018 xfs_readonly_buftarg(mp->m_logdev_targp) ||
2015 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { 2019 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
2016 cmn_err(CE_NOTE, 2020 xfs_notice(mp, "%s required on read-only device.", message);
2017 "XFS: %s required on read-only device.", message); 2021 xfs_notice(mp, "write access unavailable, cannot proceed.");
2018 cmn_err(CE_NOTE,
2019 "XFS: write access unavailable, cannot proceed.");
2020 return EROFS; 2022 return EROFS;
2021 } 2023 }
2022 return 0; 2024 return 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a62e8971539d..19af0ab0d0c6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -203,12 +203,9 @@ typedef struct xfs_mount {
203 struct mutex m_icsb_mutex; /* balancer sync lock */ 203 struct mutex m_icsb_mutex; /* balancer sync lock */
204#endif 204#endif
205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
206 struct task_struct *m_sync_task; /* generalised sync thread */ 206 struct delayed_work m_sync_work; /* background sync work */
207 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 207 struct delayed_work m_reclaim_work; /* background inode reclaim */
208 struct list_head m_sync_list; /* sync thread work item list */ 208 struct work_struct m_flush_work; /* background inode flush */
209 spinlock_t m_sync_lock; /* work item list lock */
210 int m_sync_seq; /* sync thread generation no. */
211 wait_queue_head_t m_wait_single_sync_task;
212 __int64_t m_update_flags; /* sb flags we need to update 209 __int64_t m_update_flags; /* sb flags we need to update
213 on the next remount,rw */ 210 on the next remount,rw */
214 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 9bb6eda4cd21..a595f29567fe 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -382,7 +382,8 @@ static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
382 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ 382 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
383 f | XFS_QMOPT_RES_REGBLKS) 383 f | XFS_QMOPT_RES_REGBLKS)
384 384
385extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); 385extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
386 xfs_dqid_t, uint, uint, char *);
386extern int xfs_mount_reset_sbqflags(struct xfs_mount *); 387extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
387 388
388#endif /* __KERNEL__ */ 389#endif /* __KERNEL__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 12a191385310..8f76fdff4f46 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -76,7 +76,7 @@ xfs_growfs_rt_alloc(
76 xfs_mount_t *mp, /* file system mount point */ 76 xfs_mount_t *mp, /* file system mount point */
77 xfs_extlen_t oblocks, /* old count of blocks */ 77 xfs_extlen_t oblocks, /* old count of blocks */
78 xfs_extlen_t nblocks, /* new count of blocks */ 78 xfs_extlen_t nblocks, /* new count of blocks */
79 xfs_ino_t ino) /* inode number (bitmap/summary) */ 79 xfs_inode_t *ip) /* inode (bitmap/summary) */
80{ 80{
81 xfs_fileoff_t bno; /* block number in file */ 81 xfs_fileoff_t bno; /* block number in file */
82 xfs_buf_t *bp; /* temporary buffer for zeroing */ 82 xfs_buf_t *bp; /* temporary buffer for zeroing */
@@ -86,7 +86,6 @@ xfs_growfs_rt_alloc(
86 xfs_fsblock_t firstblock; /* first block allocated in xaction */ 86 xfs_fsblock_t firstblock; /* first block allocated in xaction */
87 xfs_bmap_free_t flist; /* list of freed blocks */ 87 xfs_bmap_free_t flist; /* list of freed blocks */
88 xfs_fsblock_t fsbno; /* filesystem block for bno */ 88 xfs_fsblock_t fsbno; /* filesystem block for bno */
89 xfs_inode_t *ip; /* pointer to incore inode */
90 xfs_bmbt_irec_t map; /* block map output */ 89 xfs_bmbt_irec_t map; /* block map output */
91 int nmap; /* number of block maps */ 90 int nmap; /* number of block maps */
92 int resblks; /* space reservation */ 91 int resblks; /* space reservation */
@@ -112,9 +111,9 @@ xfs_growfs_rt_alloc(
112 /* 111 /*
113 * Lock the inode. 112 * Lock the inode.
114 */ 113 */
115 if ((error = xfs_trans_iget(mp, tp, ino, 0, 114 xfs_ilock(ip, XFS_ILOCK_EXCL);
116 XFS_ILOCK_EXCL, &ip))) 115 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
117 goto error_cancel; 116
118 xfs_bmap_init(&flist, &firstblock); 117 xfs_bmap_init(&flist, &firstblock);
119 /* 118 /*
120 * Allocate blocks to the bitmap file. 119 * Allocate blocks to the bitmap file.
@@ -155,9 +154,8 @@ xfs_growfs_rt_alloc(
155 /* 154 /*
156 * Lock the bitmap inode. 155 * Lock the bitmap inode.
157 */ 156 */
158 if ((error = xfs_trans_iget(mp, tp, ino, 0, 157 xfs_ilock(ip, XFS_ILOCK_EXCL);
159 XFS_ILOCK_EXCL, &ip))) 158 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
160 goto error_cancel;
161 /* 159 /*
162 * Get a buffer for the block. 160 * Get a buffer for the block.
163 */ 161 */
@@ -1854,7 +1852,6 @@ xfs_growfs_rt(
1854 xfs_rtblock_t bmbno; /* bitmap block number */ 1852 xfs_rtblock_t bmbno; /* bitmap block number */
1855 xfs_buf_t *bp; /* temporary buffer */ 1853 xfs_buf_t *bp; /* temporary buffer */
1856 int error; /* error return value */ 1854 int error; /* error return value */
1857 xfs_inode_t *ip; /* bitmap inode, used as lock */
1858 xfs_mount_t *nmp; /* new (fake) mount structure */ 1855 xfs_mount_t *nmp; /* new (fake) mount structure */
1859 xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ 1856 xfs_drfsbno_t nrblocks; /* new number of realtime blocks */
1860 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ 1857 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
@@ -1918,11 +1915,11 @@ xfs_growfs_rt(
1918 /* 1915 /*
1919 * Allocate space to the bitmap and summary files, as necessary. 1916 * Allocate space to the bitmap and summary files, as necessary.
1920 */ 1917 */
1921 if ((error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, 1918 error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip);
1922 mp->m_sb.sb_rbmino))) 1919 if (error)
1923 return error; 1920 return error;
1924 if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, 1921 error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
1925 mp->m_sb.sb_rsumino))) 1922 if (error)
1926 return error; 1923 return error;
1927 /* 1924 /*
1928 * Allocate a new (fake) mount/sb. 1925 * Allocate a new (fake) mount/sb.
@@ -1972,10 +1969,8 @@ xfs_growfs_rt(
1972 /* 1969 /*
1973 * Lock out other callers by grabbing the bitmap inode lock. 1970 * Lock out other callers by grabbing the bitmap inode lock.
1974 */ 1971 */
1975 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 1972 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
1976 XFS_ILOCK_EXCL, &ip))) 1973 xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
1977 goto error_cancel;
1978 ASSERT(ip == mp->m_rbmip);
1979 /* 1974 /*
1980 * Update the bitmap inode's size. 1975 * Update the bitmap inode's size.
1981 */ 1976 */
@@ -1986,10 +1981,8 @@ xfs_growfs_rt(
1986 /* 1981 /*
1987 * Get the summary inode into the transaction. 1982 * Get the summary inode into the transaction.
1988 */ 1983 */
1989 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 1984 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
1990 XFS_ILOCK_EXCL, &ip))) 1985 xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
1991 goto error_cancel;
1992 ASSERT(ip == mp->m_rsumip);
1993 /* 1986 /*
1994 * Update the summary inode's size. 1987 * Update the summary inode's size.
1995 */ 1988 */
@@ -2075,15 +2068,15 @@ xfs_rtallocate_extent(
2075 xfs_extlen_t prod, /* extent product factor */ 2068 xfs_extlen_t prod, /* extent product factor */
2076 xfs_rtblock_t *rtblock) /* out: start block allocated */ 2069 xfs_rtblock_t *rtblock) /* out: start block allocated */
2077{ 2070{
2071 xfs_mount_t *mp = tp->t_mountp;
2078 int error; /* error value */ 2072 int error; /* error value */
2079 xfs_inode_t *ip; /* inode for bitmap file */
2080 xfs_mount_t *mp; /* file system mount structure */
2081 xfs_rtblock_t r; /* result allocated block */ 2073 xfs_rtblock_t r; /* result allocated block */
2082 xfs_fsblock_t sb; /* summary file block number */ 2074 xfs_fsblock_t sb; /* summary file block number */
2083 xfs_buf_t *sumbp; /* summary file block buffer */ 2075 xfs_buf_t *sumbp; /* summary file block buffer */
2084 2076
2077 ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2085 ASSERT(minlen > 0 && minlen <= maxlen); 2078 ASSERT(minlen > 0 && minlen <= maxlen);
2086 mp = tp->t_mountp; 2079
2087 /* 2080 /*
2088 * If prod is set then figure out what to do to minlen and maxlen. 2081 * If prod is set then figure out what to do to minlen and maxlen.
2089 */ 2082 */
@@ -2099,12 +2092,7 @@ xfs_rtallocate_extent(
2099 return 0; 2092 return 0;
2100 } 2093 }
2101 } 2094 }
2102 /* 2095
2103 * Lock out other callers by grabbing the bitmap inode lock.
2104 */
2105 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2106 XFS_ILOCK_EXCL, &ip)))
2107 return error;
2108 sumbp = NULL; 2096 sumbp = NULL;
2109 /* 2097 /*
2110 * Allocate by size, or near another block, or exactly at some block. 2098 * Allocate by size, or near another block, or exactly at some block.
@@ -2123,11 +2111,12 @@ xfs_rtallocate_extent(
2123 len, &sumbp, &sb, prod, &r); 2111 len, &sumbp, &sb, prod, &r);
2124 break; 2112 break;
2125 default: 2113 default:
2114 error = EIO;
2126 ASSERT(0); 2115 ASSERT(0);
2127 } 2116 }
2128 if (error) { 2117 if (error)
2129 return error; 2118 return error;
2130 } 2119
2131 /* 2120 /*
2132 * If it worked, update the superblock. 2121 * If it worked, update the superblock.
2133 */ 2122 */
@@ -2155,7 +2144,6 @@ xfs_rtfree_extent(
2155 xfs_extlen_t len) /* length of extent freed */ 2144 xfs_extlen_t len) /* length of extent freed */
2156{ 2145{
2157 int error; /* error value */ 2146 int error; /* error value */
2158 xfs_inode_t *ip; /* bitmap file inode */
2159 xfs_mount_t *mp; /* file system mount structure */ 2147 xfs_mount_t *mp; /* file system mount structure */
2160 xfs_fsblock_t sb; /* summary file block number */ 2148 xfs_fsblock_t sb; /* summary file block number */
2161 xfs_buf_t *sumbp; /* summary file block buffer */ 2149 xfs_buf_t *sumbp; /* summary file block buffer */
@@ -2164,9 +2152,9 @@ xfs_rtfree_extent(
2164 /* 2152 /*
2165 * Synchronize by locking the bitmap inode. 2153 * Synchronize by locking the bitmap inode.
2166 */ 2154 */
2167 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 2155 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2168 XFS_ILOCK_EXCL, &ip))) 2156 xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2169 return error; 2157
2170#if defined(__KERNEL__) && defined(DEBUG) 2158#if defined(__KERNEL__) && defined(DEBUG)
2171 /* 2159 /*
2172 * Check to see that this whole range is currently allocated. 2160 * Check to see that this whole range is currently allocated.
@@ -2199,10 +2187,10 @@ xfs_rtfree_extent(
2199 */ 2187 */
2200 if (tp->t_frextents_delta + mp->m_sb.sb_frextents == 2188 if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
2201 mp->m_sb.sb_rextents) { 2189 mp->m_sb.sb_rextents) {
2202 if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) 2190 if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
2203 ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; 2191 mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2204 *(__uint64_t *)&ip->i_d.di_atime = 0; 2192 *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
2205 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2193 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
2206 } 2194 }
2207 return 0; 2195 return 0;
2208} 2196}
@@ -2222,8 +2210,8 @@ xfs_rtmount_init(
2222 if (sbp->sb_rblocks == 0) 2210 if (sbp->sb_rblocks == 0)
2223 return 0; 2211 return 0;
2224 if (mp->m_rtdev_targp == NULL) { 2212 if (mp->m_rtdev_targp == NULL) {
2225 cmn_err(CE_WARN, 2213 xfs_warn(mp,
2226 "XFS: This filesystem has a realtime volume, use rtdev=device option"); 2214 "Filesystem has a realtime volume, use rtdev=device option");
2227 return XFS_ERROR(ENODEV); 2215 return XFS_ERROR(ENODEV);
2228 } 2216 }
2229 mp->m_rsumlevels = sbp->sb_rextslog + 1; 2217 mp->m_rsumlevels = sbp->sb_rextslog + 1;
@@ -2237,7 +2225,7 @@ xfs_rtmount_init(
2237 */ 2225 */
2238 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 2226 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
2239 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { 2227 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
2240 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", 2228 xfs_warn(mp, "realtime mount -- %llu != %llu",
2241 (unsigned long long) XFS_BB_TO_FSB(mp, d), 2229 (unsigned long long) XFS_BB_TO_FSB(mp, d),
2242 (unsigned long long) mp->m_sb.sb_rblocks); 2230 (unsigned long long) mp->m_sb.sb_rblocks);
2243 return XFS_ERROR(EFBIG); 2231 return XFS_ERROR(EFBIG);
@@ -2246,7 +2234,7 @@ xfs_rtmount_init(
2246 d - XFS_FSB_TO_BB(mp, 1), 2234 d - XFS_FSB_TO_BB(mp, 1),
2247 XFS_FSB_TO_B(mp, 1), 0); 2235 XFS_FSB_TO_B(mp, 1), 0);
2248 if (!bp) { 2236 if (!bp) {
2249 cmn_err(CE_WARN, "XFS: realtime device size check failed"); 2237 xfs_warn(mp, "realtime device size check failed");
2250 return EIO; 2238 return EIO;
2251 } 2239 }
2252 xfs_buf_relse(bp); 2240 xfs_buf_relse(bp);
@@ -2306,20 +2294,16 @@ xfs_rtpick_extent(
2306 xfs_rtblock_t *pick) /* result rt extent */ 2294 xfs_rtblock_t *pick) /* result rt extent */
2307{ 2295{
2308 xfs_rtblock_t b; /* result block */ 2296 xfs_rtblock_t b; /* result block */
2309 int error; /* error return value */
2310 xfs_inode_t *ip; /* bitmap incore inode */
2311 int log2; /* log of sequence number */ 2297 int log2; /* log of sequence number */
2312 __uint64_t resid; /* residual after log removed */ 2298 __uint64_t resid; /* residual after log removed */
2313 __uint64_t seq; /* sequence number of file creation */ 2299 __uint64_t seq; /* sequence number of file creation */
2314 __uint64_t *seqp; /* pointer to seqno in inode */ 2300 __uint64_t *seqp; /* pointer to seqno in inode */
2315 2301
2316 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 2302 ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2317 XFS_ILOCK_EXCL, &ip))) 2303
2318 return error; 2304 seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime;
2319 ASSERT(ip == mp->m_rbmip); 2305 if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
2320 seqp = (__uint64_t *)&ip->i_d.di_atime; 2306 mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2321 if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
2322 ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2323 *seqp = 0; 2307 *seqp = 0;
2324 } 2308 }
2325 seq = *seqp; 2309 seq = *seqp;
@@ -2335,7 +2319,7 @@ xfs_rtpick_extent(
2335 b = mp->m_sb.sb_rextents - len; 2319 b = mp->m_sb.sb_rextents - len;
2336 } 2320 }
2337 *seqp = seq + 1; 2321 *seqp = seq + 1;
2338 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2322 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
2339 *pick = b; 2323 *pick = b;
2340 return 0; 2324 return 0;
2341} 2325}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index ff614c29b441..09e1f4f35e97 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -154,7 +154,7 @@ xfs_rtmount_init(
154 if (mp->m_sb.sb_rblocks == 0) 154 if (mp->m_sb.sb_rblocks == 0)
155 return 0; 155 return 0;
156 156
157 cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); 157 xfs_warn(mp, "Not built with CONFIG_XFS_RT");
158 return ENOSYS; 158 return ENOSYS;
159} 159}
160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 56861d5daaef..d6d6fdfe9422 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -49,9 +49,9 @@ xfs_do_force_shutdown(
49 logerror = flags & SHUTDOWN_LOG_IO_ERROR; 49 logerror = flags & SHUTDOWN_LOG_IO_ERROR;
50 50
51 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 51 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
52 cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " 52 xfs_notice(mp,
53 "line %d of file %s. Return address = 0x%p", 53 "%s(0x%x) called from line %d of file %s. Return address = 0x%p",
54 mp->m_fsname, flags, lnnum, fname, __return_address); 54 __func__, flags, lnnum, fname, __return_address);
55 } 55 }
56 /* 56 /*
57 * No need to duplicate efforts. 57 * No need to duplicate efforts.
@@ -69,30 +69,25 @@ xfs_do_force_shutdown(
69 return; 69 return;
70 70
71 if (flags & SHUTDOWN_CORRUPT_INCORE) { 71 if (flags & SHUTDOWN_CORRUPT_INCORE) {
72 xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, 72 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
73 "Corruption of in-memory data detected. Shutting down filesystem: %s", 73 "Corruption of in-memory data detected. Shutting down filesystem");
74 mp->m_fsname); 74 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
75 if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
76 xfs_stack_trace(); 75 xfs_stack_trace();
77 }
78 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 76 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
79 if (logerror) { 77 if (logerror) {
80 xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, 78 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
81 "Log I/O Error Detected. Shutting down filesystem: %s", 79 "Log I/O Error Detected. Shutting down filesystem");
82 mp->m_fsname);
83 } else if (flags & SHUTDOWN_DEVICE_REQ) { 80 } else if (flags & SHUTDOWN_DEVICE_REQ) {
84 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, 81 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
85 "All device paths lost. Shutting down filesystem: %s", 82 "All device paths lost. Shutting down filesystem");
86 mp->m_fsname);
87 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { 83 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
88 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, 84 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
89 "I/O Error Detected. Shutting down filesystem: %s", 85 "I/O Error Detected. Shutting down filesystem");
90 mp->m_fsname);
91 } 86 }
92 } 87 }
93 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 88 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
94 cmn_err(CE_ALERT, "Please umount the filesystem, " 89 xfs_alert(mp,
95 "and rectify the problem(s)"); 90 "Please umount the filesystem and rectify the problem(s)");
96 } 91 }
97} 92}
98 93
@@ -106,10 +101,9 @@ xfs_ioerror_alert(
106 xfs_buf_t *bp, 101 xfs_buf_t *bp,
107 xfs_daddr_t blkno) 102 xfs_daddr_t blkno)
108{ 103{
109 cmn_err(CE_ALERT, 104 xfs_alert(mp,
110 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" 105 "I/O error occurred: meta-data dev %s block 0x%llx"
111 " (\"%s\") error %d buf count %zd", 106 " (\"%s\") error %d buf count %zd",
112 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
113 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 107 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
114 (__uint64_t)blkno, func, 108 (__uint64_t)blkno, func,
115 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); 109 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
@@ -173,17 +167,9 @@ xfs_extlen_t
173xfs_get_extsz_hint( 167xfs_get_extsz_hint(
174 struct xfs_inode *ip) 168 struct xfs_inode *ip)
175{ 169{
176 xfs_extlen_t extsz; 170 if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
177 171 return ip->i_d.di_extsize;
178 if (unlikely(XFS_IS_REALTIME_INODE(ip))) { 172 if (XFS_IS_REALTIME_INODE(ip))
179 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) 173 return ip->i_mount->m_sb.sb_rextsize;
180 ? ip->i_d.di_extsize 174 return 0;
181 : ip->i_mount->m_sb.sb_rextsize;
182 ASSERT(extsz);
183 } else {
184 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
185 ? ip->i_d.di_extsize : 0;
186 }
187
188 return extsz;
189} 175}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c2042b736b81..06a9759b6352 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -469,8 +469,6 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
469void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 469void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
470void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 470void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
471void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 471void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
472int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
473 xfs_ino_t , uint, uint, struct xfs_inode **);
474void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); 472void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
475void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); 473void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
476void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); 474void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index c5bbbc45db91..acdb92f14d51 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,74 +28,138 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); 31struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
32STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 32
36#ifdef DEBUG 33#ifdef DEBUG
37STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); 34/*
38#else 35 * Check that the list is sorted as it should be.
36 */
37STATIC void
38xfs_ail_check(
39 struct xfs_ail *ailp,
40 xfs_log_item_t *lip)
41{
42 xfs_log_item_t *prev_lip;
43
44 if (list_empty(&ailp->xa_ail))
45 return;
46
47 /*
48 * Check the next and previous entries are valid.
49 */
50 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
51 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
52 if (&prev_lip->li_ail != &ailp->xa_ail)
53 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
54
55 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
56 if (&prev_lip->li_ail != &ailp->xa_ail)
57 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
58
59
60#ifdef XFS_TRANS_DEBUG
61 /*
62 * Walk the list checking lsn ordering, and that every entry has the
63 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
64 * when specifically debugging the transaction subsystem.
65 */
66 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
67 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
68 if (&prev_lip->li_ail != &ailp->xa_ail)
69 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
70 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
71 prev_lip = lip;
72 }
73#endif /* XFS_TRANS_DEBUG */
74}
75#else /* !DEBUG */
39#define xfs_ail_check(a,l) 76#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 77#endif /* DEBUG */
41 78
79/*
80 * Return a pointer to the first item in the AIL. If the AIL is empty, then
81 * return NULL.
82 */
83static xfs_log_item_t *
84xfs_ail_min(
85 struct xfs_ail *ailp)
86{
87 if (list_empty(&ailp->xa_ail))
88 return NULL;
89
90 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
91}
92
93 /*
94 * Return a pointer to the last item in the AIL. If the AIL is empty, then
95 * return NULL.
96 */
97static xfs_log_item_t *
98xfs_ail_max(
99 struct xfs_ail *ailp)
100{
101 if (list_empty(&ailp->xa_ail))
102 return NULL;
103
104 return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
105}
106
107/*
108 * Return a pointer to the item which follows the given item in the AIL. If
109 * the given item is the last item in the list, then return NULL.
110 */
111static xfs_log_item_t *
112xfs_ail_next(
113 struct xfs_ail *ailp,
114 xfs_log_item_t *lip)
115{
116 if (lip->li_ail.next == &ailp->xa_ail)
117 return NULL;
118
119 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
120}
42 121
43/* 122/*
44 * This is called by the log manager code to determine the LSN 123 * This is called by the log manager code to determine the LSN of the tail of
45 * of the tail of the log. This is exactly the LSN of the first 124 * the log. This is exactly the LSN of the first item in the AIL. If the AIL
46 * item in the AIL. If the AIL is empty, then this function 125 * is empty, then this function returns 0.
47 * returns 0.
48 * 126 *
49 * We need the AIL lock in order to get a coherent read of the 127 * We need the AIL lock in order to get a coherent read of the lsn of the last
50 * lsn of the last item in the AIL. 128 * item in the AIL.
51 */ 129 */
52xfs_lsn_t 130xfs_lsn_t
53xfs_trans_ail_tail( 131xfs_ail_min_lsn(
54 struct xfs_ail *ailp) 132 struct xfs_ail *ailp)
55{ 133{
56 xfs_lsn_t lsn; 134 xfs_lsn_t lsn = 0;
57 xfs_log_item_t *lip; 135 xfs_log_item_t *lip;
58 136
59 spin_lock(&ailp->xa_lock); 137 spin_lock(&ailp->xa_lock);
60 lip = xfs_ail_min(ailp); 138 lip = xfs_ail_min(ailp);
61 if (lip == NULL) { 139 if (lip)
62 lsn = (xfs_lsn_t)0;
63 } else {
64 lsn = lip->li_lsn; 140 lsn = lip->li_lsn;
65 }
66 spin_unlock(&ailp->xa_lock); 141 spin_unlock(&ailp->xa_lock);
67 142
68 return lsn; 143 return lsn;
69} 144}
70 145
71/* 146/*
72 * xfs_trans_push_ail 147 * Return the maximum lsn held in the AIL, or zero if the AIL is empty.
73 *
74 * This routine is called to move the tail of the AIL forward. It does this by
75 * trying to flush items in the AIL whose lsns are below the given
76 * threshold_lsn.
77 *
78 * the push is run asynchronously in a separate thread, so we return the tail
79 * of the log right now instead of the tail after the push. This means we will
80 * either continue right away, or we will sleep waiting on the async thread to
81 * do its work.
82 *
83 * We do this unlocked - we only need to know whether there is anything in the
84 * AIL at the time we are called. We don't need to access the contents of
85 * any of the objects, so the lock is not needed.
86 */ 148 */
87void 149static xfs_lsn_t
88xfs_trans_ail_push( 150xfs_ail_max_lsn(
89 struct xfs_ail *ailp, 151 struct xfs_ail *ailp)
90 xfs_lsn_t threshold_lsn)
91{ 152{
92 xfs_log_item_t *lip; 153 xfs_lsn_t lsn = 0;
154 xfs_log_item_t *lip;
93 155
94 lip = xfs_ail_min(ailp); 156 spin_lock(&ailp->xa_lock);
95 if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { 157 lip = xfs_ail_max(ailp);
96 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) 158 if (lip)
97 xfsaild_wakeup(ailp, threshold_lsn); 159 lsn = lip->li_lsn;
98 } 160 spin_unlock(&ailp->xa_lock);
161
162 return lsn;
99} 163}
100 164
101/* 165/*
@@ -236,16 +300,57 @@ out:
236} 300}
237 301
238/* 302/*
239 * xfsaild_push does the work of pushing on the AIL. Returning a timeout of 303 * splice the log item list into the AIL at the given LSN.
240 * zero indicates that the caller should sleep until woken.
241 */ 304 */
242long 305static void
243xfsaild_push( 306xfs_ail_splice(
244 struct xfs_ail *ailp, 307 struct xfs_ail *ailp,
245 xfs_lsn_t *last_lsn) 308 struct list_head *list,
309 xfs_lsn_t lsn)
246{ 310{
247 long tout = 0; 311 xfs_log_item_t *next_lip;
248 xfs_lsn_t last_pushed_lsn = *last_lsn; 312
313 /* If the list is empty, just insert the item. */
314 if (list_empty(&ailp->xa_ail)) {
315 list_splice(list, &ailp->xa_ail);
316 return;
317 }
318
319 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
320 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
321 break;
322 }
323
324 ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
325 XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
326
327 list_splice_init(list, &next_lip->li_ail);
328}
329
330/*
331 * Delete the given item from the AIL. Return a pointer to the item.
332 */
333static void
334xfs_ail_delete(
335 struct xfs_ail *ailp,
336 xfs_log_item_t *lip)
337{
338 xfs_ail_check(ailp, lip);
339 list_del(&lip->li_ail);
340 xfs_trans_ail_cursor_clear(ailp, lip);
341}
342
343/*
344 * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
345 * to run at a later time if there is more work to do to complete the push.
346 */
347STATIC void
348xfs_ail_worker(
349 struct work_struct *work)
350{
351 struct xfs_ail *ailp = container_of(to_delayed_work(work),
352 struct xfs_ail, xa_work);
353 long tout;
249 xfs_lsn_t target = ailp->xa_target; 354 xfs_lsn_t target = ailp->xa_target;
250 xfs_lsn_t lsn; 355 xfs_lsn_t lsn;
251 xfs_log_item_t *lip; 356 xfs_log_item_t *lip;
@@ -256,15 +361,15 @@ xfsaild_push(
256 361
257 spin_lock(&ailp->xa_lock); 362 spin_lock(&ailp->xa_lock);
258 xfs_trans_ail_cursor_init(ailp, cur); 363 xfs_trans_ail_cursor_init(ailp, cur);
259 lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); 364 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
260 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 365 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
261 /* 366 /*
262 * AIL is empty or our push has reached the end. 367 * AIL is empty or our push has reached the end.
263 */ 368 */
264 xfs_trans_ail_cursor_done(ailp, cur); 369 xfs_trans_ail_cursor_done(ailp, cur);
265 spin_unlock(&ailp->xa_lock); 370 spin_unlock(&ailp->xa_lock);
266 *last_lsn = 0; 371 ailp->xa_last_pushed_lsn = 0;
267 return tout; 372 return;
268 } 373 }
269 374
270 XFS_STATS_INC(xs_push_ail); 375 XFS_STATS_INC(xs_push_ail);
@@ -301,13 +406,13 @@ xfsaild_push(
301 case XFS_ITEM_SUCCESS: 406 case XFS_ITEM_SUCCESS:
302 XFS_STATS_INC(xs_push_ail_success); 407 XFS_STATS_INC(xs_push_ail_success);
303 IOP_PUSH(lip); 408 IOP_PUSH(lip);
304 last_pushed_lsn = lsn; 409 ailp->xa_last_pushed_lsn = lsn;
305 break; 410 break;
306 411
307 case XFS_ITEM_PUSHBUF: 412 case XFS_ITEM_PUSHBUF:
308 XFS_STATS_INC(xs_push_ail_pushbuf); 413 XFS_STATS_INC(xs_push_ail_pushbuf);
309 IOP_PUSHBUF(lip); 414 IOP_PUSHBUF(lip);
310 last_pushed_lsn = lsn; 415 ailp->xa_last_pushed_lsn = lsn;
311 push_xfsbufd = 1; 416 push_xfsbufd = 1;
312 break; 417 break;
313 418
@@ -319,7 +424,7 @@ xfsaild_push(
319 424
320 case XFS_ITEM_LOCKED: 425 case XFS_ITEM_LOCKED:
321 XFS_STATS_INC(xs_push_ail_locked); 426 XFS_STATS_INC(xs_push_ail_locked);
322 last_pushed_lsn = lsn; 427 ailp->xa_last_pushed_lsn = lsn;
323 stuck++; 428 stuck++;
324 break; 429 break;
325 430
@@ -374,9 +479,23 @@ xfsaild_push(
374 wake_up_process(mp->m_ddev_targp->bt_task); 479 wake_up_process(mp->m_ddev_targp->bt_task);
375 } 480 }
376 481
482 /* assume we have more work to do in a short while */
483 tout = 10;
377 if (!count) { 484 if (!count) {
378 /* We're past our target or empty, so idle */ 485 /* We're past our target or empty, so idle */
379 last_pushed_lsn = 0; 486 ailp->xa_last_pushed_lsn = 0;
487
488 /*
489 * Check for an updated push target before clearing the
490 * XFS_AIL_PUSHING_BIT. If the target changed, we've got more
491 * work to do. Wait a bit longer before starting that work.
492 */
493 smp_rmb();
494 if (ailp->xa_target == target) {
495 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
496 return;
497 }
498 tout = 50;
380 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 499 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
381 /* 500 /*
382 * We reached the target so wait a bit longer for I/O to 501 * We reached the target so wait a bit longer for I/O to
@@ -384,7 +503,7 @@ xfsaild_push(
384 * start the next scan from the start of the AIL. 503 * start the next scan from the start of the AIL.
385 */ 504 */
386 tout = 50; 505 tout = 50;
387 last_pushed_lsn = 0; 506 ailp->xa_last_pushed_lsn = 0;
388 } else if ((stuck * 100) / count > 90) { 507 } else if ((stuck * 100) / count > 90) {
389 /* 508 /*
390 * Either there is a lot of contention on the AIL or we 509 * Either there is a lot of contention on the AIL or we
@@ -396,14 +515,61 @@ xfsaild_push(
396 * continuing from where we were. 515 * continuing from where we were.
397 */ 516 */
398 tout = 20; 517 tout = 20;
399 } else {
400 /* more to do, but wait a short while before continuing */
401 tout = 10;
402 } 518 }
403 *last_lsn = last_pushed_lsn; 519
404 return tout; 520 /* There is more to do, requeue us. */
521 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
522 msecs_to_jiffies(tout));
523}
524
525/*
526 * This routine is called to move the tail of the AIL forward. It does this by
527 * trying to flush items in the AIL whose lsns are below the given
528 * threshold_lsn.
529 *
530 * The push is run asynchronously in a workqueue, which means the caller needs
531 * to handle waiting on the async flush for space to become available.
532 * We don't want to interrupt any push that is in progress, hence we only queue
533 * work if we set the pushing bit approriately.
534 *
535 * We do this unlocked - we only need to know whether there is anything in the
536 * AIL at the time we are called. We don't need to access the contents of
537 * any of the objects, so the lock is not needed.
538 */
539void
540xfs_ail_push(
541 struct xfs_ail *ailp,
542 xfs_lsn_t threshold_lsn)
543{
544 xfs_log_item_t *lip;
545
546 lip = xfs_ail_min(ailp);
547 if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
548 XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
549 return;
550
551 /*
552 * Ensure that the new target is noticed in push code before it clears
553 * the XFS_AIL_PUSHING_BIT.
554 */
555 smp_wmb();
556 ailp->xa_target = threshold_lsn;
557 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
558 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
405} 559}
406 560
561/*
562 * Push out all items in the AIL immediately
563 */
564void
565xfs_ail_push_all(
566 struct xfs_ail *ailp)
567{
568 xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp);
569
570 if (threshold_lsn)
571 xfs_ail_push(ailp, threshold_lsn);
572}
407 573
408/* 574/*
409 * This is to be called when an item is unlocked that may have 575 * This is to be called when an item is unlocked that may have
@@ -563,7 +729,7 @@ xfs_trans_ail_delete_bulk(
563 729
564 spin_unlock(&ailp->xa_lock); 730 spin_unlock(&ailp->xa_lock);
565 if (!XFS_FORCED_SHUTDOWN(mp)) { 731 if (!XFS_FORCED_SHUTDOWN(mp)) {
566 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 732 xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
567 "%s: attempting to delete a log item that is not in the AIL", 733 "%s: attempting to delete a log item that is not in the AIL",
568 __func__); 734 __func__);
569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 735 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
@@ -615,7 +781,6 @@ xfs_trans_ail_init(
615 xfs_mount_t *mp) 781 xfs_mount_t *mp)
616{ 782{
617 struct xfs_ail *ailp; 783 struct xfs_ail *ailp;
618 int error;
619 784
620 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); 785 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
621 if (!ailp) 786 if (!ailp)
@@ -624,15 +789,9 @@ xfs_trans_ail_init(
624 ailp->xa_mount = mp; 789 ailp->xa_mount = mp;
625 INIT_LIST_HEAD(&ailp->xa_ail); 790 INIT_LIST_HEAD(&ailp->xa_ail);
626 spin_lock_init(&ailp->xa_lock); 791 spin_lock_init(&ailp->xa_lock);
627 error = xfsaild_start(ailp); 792 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
628 if (error)
629 goto out_free_ailp;
630 mp->m_ail = ailp; 793 mp->m_ail = ailp;
631 return 0; 794 return 0;
632
633out_free_ailp:
634 kmem_free(ailp);
635 return error;
636} 795}
637 796
638void 797void
@@ -641,124 +800,6 @@ xfs_trans_ail_destroy(
641{ 800{
642 struct xfs_ail *ailp = mp->m_ail; 801 struct xfs_ail *ailp = mp->m_ail;
643 802
644 xfsaild_stop(ailp); 803 cancel_delayed_work_sync(&ailp->xa_work);
645 kmem_free(ailp); 804 kmem_free(ailp);
646} 805}
647
648/*
649 * splice the log item list into the AIL at the given LSN.
650 */
651STATIC void
652xfs_ail_splice(
653 struct xfs_ail *ailp,
654 struct list_head *list,
655 xfs_lsn_t lsn)
656{
657 xfs_log_item_t *next_lip;
658
659 /*
660 * If the list is empty, just insert the item.
661 */
662 if (list_empty(&ailp->xa_ail)) {
663 list_splice(list, &ailp->xa_ail);
664 return;
665 }
666
667 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
668 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
669 break;
670 }
671
672 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
673 (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
674
675 list_splice_init(list, &next_lip->li_ail);
676 return;
677}
678
679/*
680 * Delete the given item from the AIL. Return a pointer to the item.
681 */
682STATIC void
683xfs_ail_delete(
684 struct xfs_ail *ailp,
685 xfs_log_item_t *lip)
686{
687 xfs_ail_check(ailp, lip);
688 list_del(&lip->li_ail);
689 xfs_trans_ail_cursor_clear(ailp, lip);
690}
691
692/*
693 * Return a pointer to the first item in the AIL.
694 * If the AIL is empty, then return NULL.
695 */
696STATIC xfs_log_item_t *
697xfs_ail_min(
698 struct xfs_ail *ailp)
699{
700 if (list_empty(&ailp->xa_ail))
701 return NULL;
702
703 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
704}
705
706/*
707 * Return a pointer to the item which follows
708 * the given item in the AIL. If the given item
709 * is the last item in the list, then return NULL.
710 */
711STATIC xfs_log_item_t *
712xfs_ail_next(
713 struct xfs_ail *ailp,
714 xfs_log_item_t *lip)
715{
716 if (lip->li_ail.next == &ailp->xa_ail)
717 return NULL;
718
719 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
720}
721
722#ifdef DEBUG
723/*
724 * Check that the list is sorted as it should be.
725 */
726STATIC void
727xfs_ail_check(
728 struct xfs_ail *ailp,
729 xfs_log_item_t *lip)
730{
731 xfs_log_item_t *prev_lip;
732
733 if (list_empty(&ailp->xa_ail))
734 return;
735
736 /*
737 * Check the next and previous entries are valid.
738 */
739 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
740 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
741 if (&prev_lip->li_ail != &ailp->xa_ail)
742 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
743
744 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
745 if (&prev_lip->li_ail != &ailp->xa_ail)
746 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
747
748
749#ifdef XFS_TRANS_DEBUG
750 /*
751 * Walk the list checking lsn ordering, and that every entry has the
752 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
753 * when specifically debugging the transaction subsystem.
754 */
755 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
756 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
757 if (&prev_lip->li_ail != &ailp->xa_ail)
758 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
759 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
760 prev_lip = lip;
761 }
762#endif /* XFS_TRANS_DEBUG */
763}
764#endif /* DEBUG */
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c47918c302a5..03b3b7f85a3b 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -305,7 +305,7 @@ xfs_trans_read_buf(
305 if (xfs_error_target == target) { 305 if (xfs_error_target == target) {
306 if (((xfs_req_num++) % xfs_error_mod) == 0) { 306 if (((xfs_req_num++) % xfs_error_mod) == 0) {
307 xfs_buf_relse(bp); 307 xfs_buf_relse(bp);
308 cmn_err(CE_DEBUG, "Returning error!\n"); 308 xfs_debug(mp, "Returning error!");
309 return XFS_ERROR(EIO); 309 return XFS_ERROR(EIO);
310 } 310 }
311 } 311 }
@@ -383,7 +383,8 @@ xfs_trans_read_buf(
383 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); 383 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
384 if (bp == NULL) { 384 if (bp == NULL) {
385 *bpp = NULL; 385 *bpp = NULL;
386 return 0; 386 return (flags & XBF_TRYLOCK) ?
387 0 : XFS_ERROR(ENOMEM);
387 } 388 }
388 if (XFS_BUF_GETERROR(bp) != 0) { 389 if (XFS_BUF_GETERROR(bp) != 0) {
389 XFS_BUF_SUPER_STALE(bp); 390 XFS_BUF_SUPER_STALE(bp);
@@ -403,7 +404,7 @@ xfs_trans_read_buf(
403 xfs_force_shutdown(tp->t_mountp, 404 xfs_force_shutdown(tp->t_mountp,
404 SHUTDOWN_META_IO_ERROR); 405 SHUTDOWN_META_IO_ERROR);
405 xfs_buf_relse(bp); 406 xfs_buf_relse(bp);
406 cmn_err(CE_DEBUG, "Returning trans error!\n"); 407 xfs_debug(mp, "Returning trans error!");
407 return XFS_ERROR(EIO); 408 return XFS_ERROR(EIO);
408 } 409 }
409 } 410 }
@@ -427,7 +428,7 @@ shutdown_abort:
427 */ 428 */
428#if defined(DEBUG) 429#if defined(DEBUG)
429 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 430 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
430 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 431 xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
431#endif 432#endif
432 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != 433 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
433 (XBF_STALE|XBF_DELWRI)); 434 (XBF_STALE|XBF_DELWRI));
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index ccb34532768b..048b0c689d3e 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -44,28 +44,6 @@ xfs_trans_inode_broot_debug(
44#endif 44#endif
45 45
46/* 46/*
47 * Get an inode and join it to the transaction.
48 */
49int
50xfs_trans_iget(
51 xfs_mount_t *mp,
52 xfs_trans_t *tp,
53 xfs_ino_t ino,
54 uint flags,
55 uint lock_flags,
56 xfs_inode_t **ipp)
57{
58 int error;
59
60 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
61 if (!error && tp) {
62 xfs_trans_ijoin(tp, *ipp);
63 (*ipp)->i_itemp->ili_lock_flags = lock_flags;
64 }
65 return error;
66}
67
68/*
69 * Add a locked inode to the transaction. 47 * Add a locked inode to the transaction.
70 * 48 *
71 * The inode must be locked, and it cannot be associated with any transaction. 49 * The inode must be locked, and it cannot be associated with any transaction.
@@ -103,7 +81,7 @@ xfs_trans_ijoin(
103 * 81 *
104 * 82 *
105 * Grabs a reference to the inode which will be dropped when the transaction 83 * Grabs a reference to the inode which will be dropped when the transaction
106 * is commited. The inode will also be unlocked at that point. The inode 84 * is committed. The inode will also be unlocked at that point. The inode
107 * must be locked, and it cannot be associated with any transaction. 85 * must be locked, and it cannot be associated with any transaction.
108 */ 86 */
109void 87void
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 35162c238fa3..6b164e9e9a1f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -65,16 +65,22 @@ struct xfs_ail_cursor {
65struct xfs_ail { 65struct xfs_ail {
66 struct xfs_mount *xa_mount; 66 struct xfs_mount *xa_mount;
67 struct list_head xa_ail; 67 struct list_head xa_ail;
68 uint xa_gen;
69 struct task_struct *xa_task;
70 xfs_lsn_t xa_target; 68 xfs_lsn_t xa_target;
71 struct xfs_ail_cursor xa_cursors; 69 struct xfs_ail_cursor xa_cursors;
72 spinlock_t xa_lock; 70 spinlock_t xa_lock;
71 struct delayed_work xa_work;
72 xfs_lsn_t xa_last_pushed_lsn;
73 unsigned long xa_flags;
73}; 74};
74 75
76#define XFS_AIL_PUSHING_BIT 0
77
75/* 78/*
76 * From xfs_trans_ail.c 79 * From xfs_trans_ail.c
77 */ 80 */
81
82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
83
78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
79 struct xfs_log_item **log_items, int nr_items, 85 struct xfs_log_item **log_items, int nr_items,
80 xfs_lsn_t lsn) __releases(ailp->xa_lock); 86 xfs_lsn_t lsn) __releases(ailp->xa_lock);
@@ -98,12 +104,13 @@ xfs_trans_ail_delete(
98 xfs_trans_ail_delete_bulk(ailp, &lip, 1); 104 xfs_trans_ail_delete_bulk(ailp, &lip, 1);
99} 105}
100 106
101void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); 107void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
108void xfs_ail_push_all(struct xfs_ail *);
109xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
110
102void xfs_trans_unlocked_item(struct xfs_ail *, 111void xfs_trans_unlocked_item(struct xfs_ail *,
103 xfs_log_item_t *); 112 xfs_log_item_t *);
104 113
105xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
106
107struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 114struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
108 struct xfs_ail_cursor *cur, 115 struct xfs_ail_cursor *cur,
109 xfs_lsn_t lsn); 116 xfs_lsn_t lsn);
@@ -112,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
112void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 119void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
113 struct xfs_ail_cursor *cur); 120 struct xfs_ail_cursor *cur);
114 121
115long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
116void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
117int xfsaild_start(struct xfs_ail *);
118void xfsaild_stop(struct xfs_ail *);
119
120#if BITS_PER_LONG != 64 122#if BITS_PER_LONG != 64
121static inline void 123static inline void
122xfs_trans_ail_copy_lsn( 124xfs_trans_ail_copy_lsn(
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d8e6f8cd6f0c..b7a5fe7c52c8 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -953,7 +953,7 @@ xfs_release(
953 * If we previously truncated this file and removed old data 953 * If we previously truncated this file and removed old data
954 * in the process, we want to initiate "early" writeout on 954 * in the process, we want to initiate "early" writeout on
955 * the last close. This is an attempt to combat the notorious 955 * the last close. This is an attempt to combat the notorious
956 * NULL files problem which is particularly noticable from a 956 * NULL files problem which is particularly noticeable from a
957 * truncate down, buffered (re-)write (delalloc), followed by 957 * truncate down, buffered (re-)write (delalloc), followed by
958 * a crash. What we are effectively doing here is 958 * a crash. What we are effectively doing here is
959 * significantly reducing the time window where we'd otherwise 959 * significantly reducing the time window where we'd otherwise
@@ -982,7 +982,7 @@ xfs_release(
982 * 982 *
983 * Further, check if the inode is being opened, written and 983 * Further, check if the inode is being opened, written and
984 * closed frequently and we have delayed allocation blocks 984 * closed frequently and we have delayed allocation blocks
985 * oustanding (e.g. streaming writes from the NFS server), 985 * outstanding (e.g. streaming writes from the NFS server),
986 * truncating the blocks past EOF will cause fragmentation to 986 * truncating the blocks past EOF will cause fragmentation to
987 * occur. 987 * occur.
988 * 988 *
@@ -1189,9 +1189,8 @@ xfs_inactive(
1189 * inode might be lost for a long time or forever. 1189 * inode might be lost for a long time or forever.
1190 */ 1190 */
1191 if (!XFS_FORCED_SHUTDOWN(mp)) { 1191 if (!XFS_FORCED_SHUTDOWN(mp)) {
1192 cmn_err(CE_NOTE, 1192 xfs_notice(mp, "%s: xfs_ifree returned error %d",
1193 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1193 __func__, error);
1194 error, mp->m_fsname);
1195 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1194 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1196 } 1195 }
1197 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1196 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
@@ -1208,12 +1207,12 @@ xfs_inactive(
1208 */ 1207 */
1209 error = xfs_bmap_finish(&tp, &free_list, &committed); 1208 error = xfs_bmap_finish(&tp, &free_list, &committed);
1210 if (error) 1209 if (error)
1211 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1210 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
1212 "xfs_bmap_finish() returned error %d", error); 1211 __func__, error);
1213 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1212 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1214 if (error) 1213 if (error)
1215 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1214 xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
1216 "xfs_trans_commit() returned error %d", error); 1215 __func__, error);
1217 } 1216 }
1218 1217
1219 /* 1218 /*
@@ -1310,7 +1309,7 @@ xfs_create(
1310 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1309 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
1311 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1310 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1312 if (error) 1311 if (error)
1313 goto std_return; 1312 return error;
1314 1313
1315 if (is_dir) { 1314 if (is_dir) {
1316 rdev = 0; 1315 rdev = 0;
@@ -1390,12 +1389,6 @@ xfs_create(
1390 } 1389 }
1391 1390
1392 /* 1391 /*
1393 * At this point, we've gotten a newly allocated inode.
1394 * It is locked (and joined to the transaction).
1395 */
1396 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1397
1398 /*
1399 * Now we join the directory inode to the transaction. We do not do it 1392 * Now we join the directory inode to the transaction. We do not do it
1400 * earlier because xfs_dir_ialloc might commit the previous transaction 1393 * earlier because xfs_dir_ialloc might commit the previous transaction
1401 * (and release all the locks). An error from here on will result in 1394 * (and release all the locks). An error from here on will result in
@@ -1440,22 +1433,13 @@ xfs_create(
1440 */ 1433 */
1441 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 1434 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
1442 1435
1443 /*
1444 * xfs_trans_commit normally decrements the vnode ref count
1445 * when it unlocks the inode. Since we want to return the
1446 * vnode to the caller, we bump the vnode ref count now.
1447 */
1448 IHOLD(ip);
1449
1450 error = xfs_bmap_finish(&tp, &free_list, &committed); 1436 error = xfs_bmap_finish(&tp, &free_list, &committed);
1451 if (error) 1437 if (error)
1452 goto out_abort_rele; 1438 goto out_bmap_cancel;
1453 1439
1454 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1440 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1455 if (error) { 1441 if (error)
1456 IRELE(ip); 1442 goto out_release_inode;
1457 goto out_dqrele;
1458 }
1459 1443
1460 xfs_qm_dqrele(udqp); 1444 xfs_qm_dqrele(udqp);
1461 xfs_qm_dqrele(gdqp); 1445 xfs_qm_dqrele(gdqp);
@@ -1469,27 +1453,21 @@ xfs_create(
1469 cancel_flags |= XFS_TRANS_ABORT; 1453 cancel_flags |= XFS_TRANS_ABORT;
1470 out_trans_cancel: 1454 out_trans_cancel:
1471 xfs_trans_cancel(tp, cancel_flags); 1455 xfs_trans_cancel(tp, cancel_flags);
1472 out_dqrele: 1456 out_release_inode:
1457 /*
1458 * Wait until after the current transaction is aborted to
1459 * release the inode. This prevents recursive transactions
1460 * and deadlocks from xfs_inactive.
1461 */
1462 if (ip)
1463 IRELE(ip);
1464
1473 xfs_qm_dqrele(udqp); 1465 xfs_qm_dqrele(udqp);
1474 xfs_qm_dqrele(gdqp); 1466 xfs_qm_dqrele(gdqp);
1475 1467
1476 if (unlock_dp_on_error) 1468 if (unlock_dp_on_error)
1477 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1469 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1478 std_return:
1479 return error; 1470 return error;
1480
1481 out_abort_rele:
1482 /*
1483 * Wait until after the current transaction is aborted to
1484 * release the inode. This prevents recursive transactions
1485 * and deadlocks from xfs_inactive.
1486 */
1487 xfs_bmap_cancel(&free_list);
1488 cancel_flags |= XFS_TRANS_ABORT;
1489 xfs_trans_cancel(tp, cancel_flags);
1490 IRELE(ip);
1491 unlock_dp_on_error = B_FALSE;
1492 goto out_dqrele;
1493} 1471}
1494 1472
1495#ifdef DEBUG 1473#ifdef DEBUG
@@ -2114,9 +2092,8 @@ xfs_symlink(
2114 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 2092 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
2115 &first_block, resblks, mval, &nmaps, 2093 &first_block, resblks, mval, &nmaps,
2116 &free_list); 2094 &free_list);
2117 if (error) { 2095 if (error)
2118 goto error1; 2096 goto error2;
2119 }
2120 2097
2121 if (resblks) 2098 if (resblks)
2122 resblks -= fs_blocks; 2099 resblks -= fs_blocks;
@@ -2148,7 +2125,7 @@ xfs_symlink(
2148 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 2125 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
2149 &first_block, &free_list, resblks); 2126 &first_block, &free_list, resblks);
2150 if (error) 2127 if (error)
2151 goto error1; 2128 goto error2;
2152 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2129 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2153 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2130 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2154 2131
@@ -2161,13 +2138,6 @@ xfs_symlink(
2161 xfs_trans_set_sync(tp); 2138 xfs_trans_set_sync(tp);
2162 } 2139 }
2163 2140
2164 /*
2165 * xfs_trans_commit normally decrements the vnode ref count
2166 * when it unlocks the inode. Since we want to return the
2167 * vnode to the caller, we bump the vnode ref count now.
2168 */
2169 IHOLD(ip);
2170
2171 error = xfs_bmap_finish(&tp, &free_list, &committed); 2141 error = xfs_bmap_finish(&tp, &free_list, &committed);
2172 if (error) { 2142 if (error) {
2173 goto error2; 2143 goto error2;
@@ -2861,7 +2831,8 @@ xfs_change_file_space(
2861 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 2831 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
2862 2832
2863 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2833 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2864 xfs_trans_set_sync(tp); 2834 if (attr_flags & XFS_ATTR_SYNC)
2835 xfs_trans_set_sync(tp);
2865 2836
2866 error = xfs_trans_commit(tp, 0); 2837 error = xfs_trans_commit(tp, 0);
2867 2838
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index f6702927eee4..3bcd23353d6c 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
18#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ 18#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
19#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ 19#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
20#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ 20#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
21#define XFS_ATTR_SYNC 0x10 /* synchronous operation required */
21 22
22int xfs_readlink(struct xfs_inode *ip, char *link); 23int xfs_readlink(struct xfs_inode *ip, char *link);
23int xfs_release(struct xfs_inode *ip); 24int xfs_release(struct xfs_inode *ip);