aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Makefile4
-rw-r--r--fs/9p/fid.c111
-rw-r--r--fs/9p/v9fs.c3
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h3
-rw-r--r--fs/9p/vfs_dir.c134
-rw-r--r--fs/9p/vfs_file.c26
-rw-r--r--fs/9p/vfs_inode.c770
-rw-r--r--fs/9p/vfs_super.c54
-rw-r--r--fs/9p/xattr.c160
-rw-r--r--fs/9p/xattr.h27
-rw-r--r--fs/9p/xattr_user.c80
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/adfs/inode.c16
-rw-r--r--fs/affs/affs.h3
-rw-r--r--fs/affs/file.c11
-rw-r--r--fs/affs/inode.c38
-rw-r--r--fs/affs/super.c32
-rw-r--r--fs/afs/Kconfig1
-rw-r--r--fs/afs/cell.c40
-rw-r--r--fs/afs/inode.c5
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/main.c9
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/aio.c21
-rw-r--r--fs/attr.c88
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/file.c17
-rw-r--r--fs/bfs/inode.c116
-rw-r--r--fs/binfmt_misc.c5
-rw-r--r--fs/block_dev.c21
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/inode.c34
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/buffer.c180
-rw-r--r--fs/cachefiles/bind.c2
-rw-r--r--fs/cachefiles/daemon.c6
-rw-r--r--fs/cachefiles/namei.c13
-rw-r--r--fs/cachefiles/rdwr.c4
-rw-r--r--fs/ceph/Kconfig2
-rw-r--r--fs/ceph/caps.c15
-rw-r--r--fs/ceph/dir.c13
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c6
-rw-r--r--fs/ceph/mds_client.c10
-rw-r--r--fs/ceph/mon_client.c6
-rw-r--r--fs/ceph/osd_client.c6
-rw-r--r--fs/ceph/osdmap.c26
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/Kconfig27
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README5
-rw-r--r--fs/cifs/cache.c331
-rw-r--r--fs/cifs/cifs_debug.c25
-rw-r--r--fs/cifs/cifs_dfs_ref.c33
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifs_spnego.c7
-rw-r--r--fs/cifs/cifsfs.c44
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h48
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/connect.c181
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/dns_resolve.c231
-rw-r--r--fs/cifs/dns_resolve.h2
-rw-r--r--fs/cifs/file.c104
-rw-r--r--fs/cifs/fscache.c236
-rw-r--r--fs/cifs/fscache.h136
-rw-r--r--fs/cifs/inode.c149
-rw-r--r--fs/cifs/ioctl.c3
-rw-r--r--fs/cifs/misc.c20
-rw-r--r--fs/cifs/netmisc.c63
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/smberr.h1
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/compat.c22
-rw-r--r--fs/compat_ioctl.c11
-rw-r--r--fs/cramfs/inode.c88
-rw-r--r--fs/dcache.c39
-rw-r--r--fs/direct-io.c100
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/netlink.c15
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/inode.c18
-rw-r--r--fs/ecryptfs/messaging.c17
-rw-r--r--fs/ecryptfs/super.c14
-rw-r--r--fs/exec.c8
-rw-r--r--fs/exofs/exofs.h3
-rw-r--r--fs/exofs/file.c1
-rw-r--r--fs/exofs/inode.c127
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/ext2/balloc.c11
-rw-r--r--fs/ext2/dir.c23
-rw-r--r--fs/ext2/ext2.h5
-rw-r--r--fs/ext2/ialloc.c13
-rw-r--r--fs/ext2/inode.c87
-rw-r--r--fs/ext2/super.c14
-rw-r--r--fs/ext2/xattr.c25
-rw-r--r--fs/ext3/Kconfig1
-rw-r--r--fs/ext3/ialloc.c12
-rw-r--r--fs/ext3/inode.c143
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c31
-rw-r--r--fs/ext3/xattr.c12
-rw-r--r--fs/ext4/acl.c1
-rw-r--r--fs/ext4/balloc.c6
-rw-r--r--fs/ext4/block_validity.c8
-rw-r--r--fs/ext4/dir.c23
-rw-r--r--fs/ext4/ext4.h155
-rw-r--r--fs/ext4/ext4_jbd2.c71
-rw-r--r--fs/ext4/ext4_jbd2.h56
-rw-r--r--fs/ext4/extents.c20
-rw-r--r--fs/ext4/file.c5
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c258
-rw-r--r--fs/ext4/mballoc.c155
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c10
-rw-r--r--fs/ext4/namei.c40
-rw-r--r--fs/ext4/resize.c8
-rw-r--r--fs/ext4/super.c346
-rw-r--r--fs/ext4/xattr.c15
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/file.c49
-rw-r--r--fs/fat/inode.c26
-rw-r--r--fs/file.c3
-rw-r--r--fs/freevxfs/vxfs_extern.h2
-rw-r--r--fs/freevxfs/vxfs_inode.c8
-rw-r--r--fs/freevxfs/vxfs_super.c4
-rw-r--r--fs/fs-writeback.c13
-rw-r--r--fs/fscache/Kconfig1
-rw-r--r--fs/fscache/internal.h8
-rw-r--r--fs/fscache/main.c106
-rw-r--r--fs/fscache/object-list.c11
-rw-r--r--fs/fscache/object.c106
-rw-r--r--fs/fscache/operation.c67
-rw-r--r--fs/fscache/page.c36
-rw-r--r--fs/fuse/dev.c229
-rw-r--r--fs/fuse/dir.c19
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c6
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/aops.c19
-rw-r--r--fs/gfs2/bmap.c17
-rw-r--r--fs/gfs2/bmap.h2
-rw-r--r--fs/gfs2/dir.c42
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c105
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c27
-rw-r--r--fs/gfs2/main.c14
-rw-r--r--fs/gfs2/ops_fstype.c35
-rw-r--r--fs/gfs2/ops_inode.c18
-rw-r--r--fs/gfs2/quota.c25
-rw-r--r--fs/gfs2/recovery.c54
-rw-r--r--fs/gfs2/recovery.h6
-rw-r--r--fs/gfs2/super.c52
-rw-r--r--fs/gfs2/sys.c60
-rw-r--r--fs/gfs2/xattr.c24
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c70
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/inode.c77
-rw-r--r--fs/hfsplus/super.c10
-rw-r--r--fs/hostfs/hostfs.h22
-rw-r--r--fs/hostfs/hostfs_kern.c517
-rw-r--r--fs/hostfs/hostfs_user.c112
-rw-r--r--fs/hpfs/file.c11
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/inode.c24
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c41
-rw-r--r--fs/inode.c177
-rw-r--r--fs/jbd/journal.c7
-rw-r--r--fs/jbd/recovery.c11
-rw-r--r--fs/jbd2/checkpoint.c18
-rw-r--r--fs/jbd2/commit.c50
-rw-r--r--fs/jbd2/journal.c121
-rw-r--r--fs/jbd2/recovery.c10
-rw-r--r--fs/jbd2/transaction.c233
-rw-r--r--fs/jffs2/dir.c16
-rw-r--r--fs/jffs2/fs.c10
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/file.c14
-rw-r--r--fs/jfs/inode.c63
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/super.c8
-rw-r--r--fs/libfs.c70
-rw-r--r--fs/logfs/dir.c5
-rw-r--r--fs/logfs/file.c18
-rw-r--r--fs/logfs/inode.c51
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/logfs.h4
-rw-r--r--fs/logfs/readwrite.c62
-rw-r--r--fs/logfs/segment.c1
-rw-r--r--fs/logfs/super.c23
-rw-r--r--fs/mbcache.c168
-rw-r--r--fs/minix/bitmap.c6
-rw-r--r--fs/minix/dir.c21
-rw-r--r--fs/minix/file.c22
-rw-r--r--fs/minix/inode.c35
-rw-r--r--fs/minix/minix.h4
-rw-r--r--fs/namei.c6
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/inode.c40
-rw-r--r--fs/nfs/Kconfig10
-rw-r--r--fs/nfs/callback_proc.c19
-rw-r--r--fs/nfs/client.c21
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h4
-rw-r--r--fs/nfs/dir.c11
-rw-r--r--fs/nfs/direct.c29
-rw-r--r--fs/nfs/file.c64
-rw-r--r--fs/nfs/inode.c87
-rw-r--r--fs/nfs/internal.h11
-rw-r--r--fs/nfs/nfs2xdr.c7
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h57
-rw-r--r--fs/nfs/nfs4proc.c474
-rw-r--r--fs/nfs/nfs4renewd.c4
-rw-r--r--fs/nfs/nfs4state.c82
-rw-r--r--fs/nfs/nfs4xdr.c107
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c39
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4callback.c57
-rw-r--r--fs/nfsd/nfs4state.c381
-rw-r--r--fs/nfsd/nfs4xdr.c9
-rw-r--r--fs/nfsd/nfsctl.c24
-rw-r--r--fs/nfsd/nfsd.h1
-rw-r--r--fs/nfsd/nfsproc.c4
-rw-r--r--fs/nfsd/nfssvc.c151
-rw-r--r--fs/nfsd/state.h40
-rw-r--r--fs/nfsd/vfs.c89
-rw-r--r--fs/nfsd/vfs.h4
-rw-r--r--fs/nilfs2/bmap.c6
-rw-r--r--fs/nilfs2/bmap.h16
-rw-r--r--fs/nilfs2/bmap_union.h42
-rw-r--r--fs/nilfs2/btnode.c23
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c914
-rw-r--r--fs/nilfs2/btree.h12
-rw-r--r--fs/nilfs2/dir.c58
-rw-r--r--fs/nilfs2/direct.c96
-rw-r--r--fs/nilfs2/direct.h11
-rw-r--r--fs/nilfs2/gcdat.c2
-rw-r--r--fs/nilfs2/gcinode.c17
-rw-r--r--fs/nilfs2/inode.c78
-rw-r--r--fs/nilfs2/mdt.c1
-rw-r--r--fs/nilfs2/nilfs.h24
-rw-r--r--fs/nilfs2/page.c5
-rw-r--r--fs/nilfs2/page.h2
-rw-r--r--fs/nilfs2/recovery.c359
-rw-r--r--fs/nilfs2/segbuf.h24
-rw-r--r--fs/nilfs2/segment.c19
-rw-r--r--fs/nilfs2/segment.h10
-rw-r--r--fs/nilfs2/super.c353
-rw-r--r--fs/nilfs2/the_nilfs.c161
-rw-r--r--fs/nilfs2/the_nilfs.h23
-rw-r--r--fs/notify/inode_mark.c6
-rw-r--r--fs/ntfs/inode.c10
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c6
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c18
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c22
-rw-r--r--fs/ocfs2/inode.c29
-rw-r--r--fs/ocfs2/inode.h5
-rw-r--r--fs/ocfs2/journal.c4
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/omfs/file.c36
-rw-r--r--fs/omfs/inode.c9
-rw-r--r--fs/open.c11
-rw-r--r--fs/partitions/check.c1
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c120
-rw-r--r--fs/proc/generic.c18
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/proc_sysctl.c15
-rw-r--r--fs/qnx4/inode.c11
-rw-r--r--fs/quota/dquot.c81
-rw-r--r--fs/quota/quota_tree.c85
-rw-r--r--fs/quota/quota_tree.h6
-rw-r--r--fs/quota/quota_v1.c3
-rw-r--r--fs/quota/quota_v2.c11
-rw-r--r--fs/ramfs/file-nommu.c7
-rw-r--r--fs/readdir.c8
-rw-r--r--fs/reiserfs/file.c50
-rw-r--r--fs/reiserfs/inode.c136
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--fs/smbfs/inode.c12
-rw-r--r--fs/statfs.c95
-rw-r--r--fs/super.c51
-rw-r--r--fs/sync.c25
-rw-r--r--fs/sysfs/file.c3
-rw-r--r--fs/sysfs/inode.c8
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/dir.c21
-rw-r--r--fs/sysv/file.c22
-rw-r--r--fs/sysv/ialloc.c1
-rw-r--r--fs/sysv/inode.c19
-rw-r--r--fs/sysv/itree.c19
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/sysv/sysv.h4
-rw-r--r--fs/ubifs/file.c23
-rw-r--r--fs/ubifs/lpt.c14
-rw-r--r--fs/ubifs/lpt_commit.c2
-rw-r--r--fs/ubifs/recovery.c23
-rw-r--r--fs/ubifs/super.c16
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/udf/file.c23
-rw-r--r--fs/udf/ialloc.c2
-rw-r--r--fs/udf/inode.c61
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/ufs/dir.c13
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c63
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/ufs/truncate.c16
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/ufs/util.h4
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c645
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h119
-rw-r--r--fs/xfs/linux-2.6/xfs_dmapi_priv.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c104
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h25
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c30
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c177
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c49
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h128
-rw-r--r--fs/xfs/quota/xfs_dquot.c114
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c301
-rw-r--r--fs/xfs/quota/xfs_qm.c15
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c10
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c10
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c121
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c35
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/xfs_alloc.c15
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_alloc_btree.c5
-rw-r--r--fs/xfs/xfs_attr.c91
-rw-r--r--fs/xfs/xfs_attr_leaf.c5
-rw-r--r--fs/xfs/xfs_bmap.c327
-rw-r--r--fs/xfs/xfs_bmap.h37
-rw-r--r--fs/xfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/xfs_btree.c5
-rw-r--r--fs/xfs/xfs_buf_item.c228
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_da_btree.c20
-rw-r--r--fs/xfs/xfs_dfrag.c16
-rw-r--r--fs/xfs/xfs_dir2.c11
-rw-r--r--fs/xfs/xfs_dir2_block.c8
-rw-r--r--fs/xfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/xfs_dir2_leaf.c4
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/xfs_dmapi.h170
-rw-r--r--fs/xfs/xfs_dmops.c55
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c278
-rw-r--r--fs/xfs/xfs_filestream.c84
-rw-r--r--fs/xfs/xfs_filestream.h82
-rw-r--r--fs/xfs/xfs_fsops.c7
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_ialloc_btree.c4
-rw-r--r--fs/xfs/xfs_iget.c108
-rw-r--r--fs/xfs/xfs_inode.c60
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c273
-rw-r--r--fs/xfs/xfs_inode_item.h12
-rw-r--r--fs/xfs/xfs_iomap.c76
-rw-r--r--fs/xfs/xfs_iomap.h22
-rw-r--r--fs/xfs/xfs_itable.c8
-rw-r--r--fs/xfs/xfs_log.c16
-rw-r--r--fs/xfs/xfs_log.h11
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_log_recover.c42
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_mount.h69
-rw-r--r--fs/xfs/xfs_rename.c63
-rw-r--r--fs/xfs/xfs_rtalloc.c9
-rw-r--r--fs/xfs/xfs_rw.c15
-rw-r--r--fs/xfs/xfs_trans.c211
-rw-r--r--fs/xfs/xfs_trans.h117
-rw-r--r--fs/xfs/xfs_trans_ail.c1
-rw-r--r--fs/xfs/xfs_trans_buf.c75
-rw-r--r--fs/xfs/xfs_trans_extfree.c23
-rw-r--r--fs/xfs/xfs_trans_inode.c74
-rw-r--r--fs/xfs/xfs_trans_item.c441
-rw-r--r--fs/xfs/xfs_trans_priv.h18
-rw-r--r--fs/xfs/xfs_utils.c87
-rw-r--r--fs/xfs/xfs_utils.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c331
423 files changed, 10732 insertions, 9656 deletions
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 1a940ec7af61..91fba025fcbe 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_9P_FS) := 9p.o
8 vfs_dir.o \ 8 vfs_dir.o \
9 vfs_dentry.o \ 9 vfs_dentry.o \
10 v9fs.o \ 10 v9fs.o \
11 fid.o 11 fid.o \
12 xattr.o \
13 xattr_user.o
12 14
139p-$(CONFIG_9P_FSCACHE) += cache.o 159p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 7317b39b2815..358563689064 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -97,6 +97,34 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
97 return ret; 97 return ret;
98} 98}
99 99
100/*
101 * We need to hold v9ses->rename_sem as long as we hold references
102 * to returned path array. Array element contain pointers to
103 * dentry names.
104 */
105static int build_path_from_dentry(struct v9fs_session_info *v9ses,
106 struct dentry *dentry, char ***names)
107{
108 int n = 0, i;
109 char **wnames;
110 struct dentry *ds;
111
112 for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
113 n++;
114
115 wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL);
116 if (!wnames)
117 goto err_out;
118
119 for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent)
120 wnames[i] = (char *)ds->d_name.name;
121
122 *names = wnames;
123 return n;
124err_out:
125 return -ENOMEM;
126}
127
100/** 128/**
101 * v9fs_fid_lookup - lookup for a fid, try to walk if not found 129 * v9fs_fid_lookup - lookup for a fid, try to walk if not found
102 * @dentry: dentry to look for fid in 130 * @dentry: dentry to look for fid in
@@ -112,7 +140,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
112 int i, n, l, clone, any, access; 140 int i, n, l, clone, any, access;
113 u32 uid; 141 u32 uid;
114 struct p9_fid *fid, *old_fid = NULL; 142 struct p9_fid *fid, *old_fid = NULL;
115 struct dentry *d, *ds; 143 struct dentry *ds;
116 struct v9fs_session_info *v9ses; 144 struct v9fs_session_info *v9ses;
117 char **wnames, *uname; 145 char **wnames, *uname;
118 146
@@ -139,49 +167,62 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
139 fid = v9fs_fid_find(dentry, uid, any); 167 fid = v9fs_fid_find(dentry, uid, any);
140 if (fid) 168 if (fid)
141 return fid; 169 return fid;
142 170 /*
171 * we don't have a matching fid. To do a TWALK we need
172 * parent fid. We need to prevent rename when we want to
173 * look at the parent.
174 */
175 down_read(&v9ses->rename_sem);
143 ds = dentry->d_parent; 176 ds = dentry->d_parent;
144 fid = v9fs_fid_find(ds, uid, any); 177 fid = v9fs_fid_find(ds, uid, any);
145 if (!fid) { /* walk from the root */ 178 if (fid) {
146 n = 0; 179 /* Found the parent fid do a lookup with that */
147 for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent) 180 fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
148 n++; 181 goto fid_out;
182 }
183 up_read(&v9ses->rename_sem);
149 184
150 fid = v9fs_fid_find(ds, uid, any); 185 /* start from the root and try to do a lookup */
151 if (!fid) { /* the user is not attached to the fs yet */ 186 fid = v9fs_fid_find(dentry->d_sb->s_root, uid, any);
152 if (access == V9FS_ACCESS_SINGLE) 187 if (!fid) {
153 return ERR_PTR(-EPERM); 188 /* the user is not attached to the fs yet */
189 if (access == V9FS_ACCESS_SINGLE)
190 return ERR_PTR(-EPERM);
154 191
155 if (v9fs_proto_dotu(v9ses)) 192 if (v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses))
156 uname = NULL; 193 uname = NULL;
157 else 194 else
158 uname = v9ses->uname; 195 uname = v9ses->uname;
159 196
160 fid = p9_client_attach(v9ses->clnt, NULL, uname, uid, 197 fid = p9_client_attach(v9ses->clnt, NULL, uname, uid,
161 v9ses->aname); 198 v9ses->aname);
162 199 if (IS_ERR(fid))
163 if (IS_ERR(fid)) 200 return fid;
164 return fid;
165
166 v9fs_fid_add(ds, fid);
167 }
168 } else /* walk from the parent */
169 n = 1;
170 201
171 if (ds == dentry) 202 v9fs_fid_add(dentry->d_sb->s_root, fid);
203 }
204 /* If we are root ourself just return that */
205 if (dentry->d_sb->s_root == dentry)
172 return fid; 206 return fid;
173 207 /*
174 wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL); 208 * Do a multipath walk with attached root.
175 if (!wnames) 209 * When walking parent we need to make sure we
176 return ERR_PTR(-ENOMEM); 210 * don't have a parallel rename happening
177 211 */
178 for (d = dentry, i = (n-1); i >= 0; i--, d = d->d_parent) 212 down_read(&v9ses->rename_sem);
179 wnames[i] = (char *) d->d_name.name; 213 n = build_path_from_dentry(v9ses, dentry, &wnames);
180 214 if (n < 0) {
215 fid = ERR_PTR(n);
216 goto err_out;
217 }
181 clone = 1; 218 clone = 1;
182 i = 0; 219 i = 0;
183 while (i < n) { 220 while (i < n) {
184 l = min(n - i, P9_MAXWELEM); 221 l = min(n - i, P9_MAXWELEM);
222 /*
223 * We need to hold rename lock when doing a multipath
224 * walk to ensure none of the patch component change
225 */
185 fid = p9_client_walk(fid, l, &wnames[i], clone); 226 fid = p9_client_walk(fid, l, &wnames[i], clone);
186 if (IS_ERR(fid)) { 227 if (IS_ERR(fid)) {
187 if (old_fid) { 228 if (old_fid) {
@@ -193,15 +234,17 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
193 p9_client_clunk(old_fid); 234 p9_client_clunk(old_fid);
194 } 235 }
195 kfree(wnames); 236 kfree(wnames);
196 return fid; 237 goto err_out;
197 } 238 }
198 old_fid = fid; 239 old_fid = fid;
199 i += l; 240 i += l;
200 clone = 0; 241 clone = 0;
201 } 242 }
202
203 kfree(wnames); 243 kfree(wnames);
244fid_out:
204 v9fs_fid_add(dentry, fid); 245 v9fs_fid_add(dentry, fid);
246err_out:
247 up_read(&v9ses->rename_sem);
205 return fid; 248 return fid;
206} 249}
207 250
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8b86e92cd66..38dc0e067599 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -237,6 +237,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
237 __putname(v9ses->uname); 237 __putname(v9ses->uname);
238 return ERR_PTR(-ENOMEM); 238 return ERR_PTR(-ENOMEM);
239 } 239 }
240 init_rwsem(&v9ses->rename_sem);
240 241
241 rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); 242 rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
242 if (rc) { 243 if (rc) {
@@ -278,7 +279,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
278 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 279 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
279 280
280 /* for legacy mode, fall back to V9FS_ACCESS_ANY */ 281 /* for legacy mode, fall back to V9FS_ACCESS_ANY */
281 if (!v9fs_proto_dotu(v9ses) && 282 if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
282 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { 283 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
283 284
284 v9ses->flags &= ~V9FS_ACCESS_MASK; 285 v9ses->flags &= ~V9FS_ACCESS_MASK;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index bec4d0bcb458..4c963c9fc41f 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -104,6 +104,7 @@ struct v9fs_session_info {
104 struct p9_client *clnt; /* 9p client */ 104 struct p9_client *clnt; /* 9p client */
105 struct list_head slist; /* list of sessions registered with v9fs */ 105 struct list_head slist; /* list of sessions registered with v9fs */
106 struct backing_dev_info bdi; 106 struct backing_dev_info bdi;
107 struct rw_semaphore rename_sem;
107}; 108};
108 109
109struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 110struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 32ef4009d030..88418c419ea7 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -52,9 +52,10 @@ void v9fs_destroy_inode(struct inode *inode);
52#endif 52#endif
53 53
54struct inode *v9fs_get_inode(struct super_block *sb, int mode); 54struct inode *v9fs_get_inode(struct super_block *sb, int mode);
55void v9fs_clear_inode(struct inode *inode); 55void v9fs_evict_inode(struct inode *inode);
56ino_t v9fs_qid2ino(struct p9_qid *qid); 56ino_t v9fs_qid2ino(struct p9_qid *qid);
57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
58void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
58int v9fs_dir_release(struct inode *inode, struct file *filp); 59int v9fs_dir_release(struct inode *inode, struct file *filp);
59int v9fs_file_open(struct inode *inode, struct file *file); 60int v9fs_file_open(struct inode *inode, struct file *file);
60void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); 61void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 36d961f342af..16c8a2a98c1b 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -87,29 +87,19 @@ static void p9stat_init(struct p9_wstat *stbuf)
87} 87}
88 88
89/** 89/**
90 * v9fs_dir_readdir - read a directory 90 * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir
91 * @filp: opened file structure 91 * @filp: opened file structure
92 * @dirent: directory structure ??? 92 * @buflen: Length in bytes of buffer to allocate
93 * @filldir: function to populate directory structure ???
94 * 93 *
95 */ 94 */
96 95
97static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) 96static int v9fs_alloc_rdir_buf(struct file *filp, int buflen)
98{ 97{
99 int over;
100 struct p9_wstat st;
101 int err = 0;
102 struct p9_fid *fid;
103 int buflen;
104 int reclen = 0;
105 struct p9_rdir *rdir; 98 struct p9_rdir *rdir;
99 struct p9_fid *fid;
100 int err = 0;
106 101
107 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
108 fid = filp->private_data; 102 fid = filp->private_data;
109
110 buflen = fid->clnt->msize - P9_IOHDRSZ;
111
112 /* allocate rdir on demand */
113 if (!fid->rdir) { 103 if (!fid->rdir) {
114 rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); 104 rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
115 105
@@ -128,6 +118,36 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
128 spin_unlock(&filp->f_dentry->d_lock); 118 spin_unlock(&filp->f_dentry->d_lock);
129 kfree(rdir); 119 kfree(rdir);
130 } 120 }
121exit:
122 return err;
123}
124
125/**
126 * v9fs_dir_readdir - read a directory
127 * @filp: opened file structure
128 * @dirent: directory structure ???
129 * @filldir: function to populate directory structure ???
130 *
131 */
132
133static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
134{
135 int over;
136 struct p9_wstat st;
137 int err = 0;
138 struct p9_fid *fid;
139 int buflen;
140 int reclen = 0;
141 struct p9_rdir *rdir;
142
143 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
144 fid = filp->private_data;
145
146 buflen = fid->clnt->msize - P9_IOHDRSZ;
147
148 err = v9fs_alloc_rdir_buf(filp, buflen);
149 if (err)
150 goto exit;
131 rdir = (struct p9_rdir *) fid->rdir; 151 rdir = (struct p9_rdir *) fid->rdir;
132 152
133 err = mutex_lock_interruptible(&rdir->mutex); 153 err = mutex_lock_interruptible(&rdir->mutex);
@@ -176,6 +196,88 @@ exit:
176 return err; 196 return err;
177} 197}
178 198
199/**
200 * v9fs_dir_readdir_dotl - read a directory
201 * @filp: opened file structure
202 * @dirent: buffer to fill dirent structures
203 * @filldir: function to populate dirent structures
204 *
205 */
206static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
207 filldir_t filldir)
208{
209 int over;
210 int err = 0;
211 struct p9_fid *fid;
212 int buflen;
213 struct p9_rdir *rdir;
214 struct p9_dirent curdirent;
215 u64 oldoffset = 0;
216
217 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
218 fid = filp->private_data;
219
220 buflen = fid->clnt->msize - P9_READDIRHDRSZ;
221
222 err = v9fs_alloc_rdir_buf(filp, buflen);
223 if (err)
224 goto exit;
225 rdir = (struct p9_rdir *) fid->rdir;
226
227 err = mutex_lock_interruptible(&rdir->mutex);
228 if (err)
229 return err;
230
231 while (err == 0) {
232 if (rdir->tail == rdir->head) {
233 err = p9_client_readdir(fid, rdir->buf, buflen,
234 filp->f_pos);
235 if (err <= 0)
236 goto unlock_and_exit;
237
238 rdir->head = 0;
239 rdir->tail = err;
240 }
241
242 while (rdir->head < rdir->tail) {
243
244 err = p9dirent_read(rdir->buf + rdir->head,
245 buflen - rdir->head, &curdirent,
246 fid->clnt->proto_version);
247 if (err < 0) {
248 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
249 err = -EIO;
250 goto unlock_and_exit;
251 }
252
253 /* d_off in dirent structure tracks the offset into
254 * the next dirent in the dir. However, filldir()
255 * expects offset into the current dirent. Hence
256 * while calling filldir send the offset from the
257 * previous dirent structure.
258 */
259 over = filldir(dirent, curdirent.d_name,
260 strlen(curdirent.d_name),
261 oldoffset, v9fs_qid2ino(&curdirent.qid),
262 curdirent.d_type);
263 oldoffset = curdirent.d_off;
264
265 if (over) {
266 err = 0;
267 goto unlock_and_exit;
268 }
269
270 filp->f_pos = curdirent.d_off;
271 rdir->head += err;
272 }
273 }
274
275unlock_and_exit:
276 mutex_unlock(&rdir->mutex);
277exit:
278 return err;
279}
280
179 281
180/** 282/**
181 * v9fs_dir_release - close a directory 283 * v9fs_dir_release - close a directory
@@ -207,7 +309,7 @@ const struct file_operations v9fs_dir_operations = {
207const struct file_operations v9fs_dir_operations_dotl = { 309const struct file_operations v9fs_dir_operations_dotl = {
208 .read = generic_read_dir, 310 .read = generic_read_dir,
209 .llseek = generic_file_llseek, 311 .llseek = generic_file_llseek,
210 .readdir = v9fs_dir_readdir, 312 .readdir = v9fs_dir_readdir_dotl,
211 .open = v9fs_file_open, 313 .open = v9fs_file_open,
212 .release = v9fs_dir_release, 314 .release = v9fs_dir_release,
213}; 315};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2bedc6c94fc2..e97c92bd6f16 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -59,9 +59,13 @@ int v9fs_file_open(struct inode *inode, struct file *file)
59 struct p9_fid *fid; 59 struct p9_fid *fid;
60 int omode; 60 int omode;
61 61
62 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); 62 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
63 v9ses = v9fs_inode2v9ses(inode); 63 v9ses = v9fs_inode2v9ses(inode);
64 omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses)); 64 if (v9fs_proto_dotl(v9ses))
65 omode = file->f_flags;
66 else
67 omode = v9fs_uflags2omode(file->f_flags,
68 v9fs_proto_dotu(v9ses));
65 fid = file->private_data; 69 fid = file->private_data;
66 if (!fid) { 70 if (!fid) {
67 fid = v9fs_fid_clone(file->f_path.dentry); 71 fid = v9fs_fid_clone(file->f_path.dentry);
@@ -73,11 +77,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
73 p9_client_clunk(fid); 77 p9_client_clunk(fid);
74 return err; 78 return err;
75 } 79 }
76 if (omode & P9_OTRUNC) { 80 if (file->f_flags & O_TRUNC) {
77 i_size_write(inode, 0); 81 i_size_write(inode, 0);
78 inode->i_blocks = 0; 82 inode->i_blocks = 0;
79 } 83 }
80 if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses))) 84 if ((file->f_flags & O_APPEND) &&
85 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
81 generic_file_llseek(file, 0, SEEK_END); 86 generic_file_llseek(file, 0, SEEK_END);
82 } 87 }
83 88
@@ -139,7 +144,7 @@ ssize_t
139v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, 144v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
140 u64 offset) 145 u64 offset)
141{ 146{
142 int n, total; 147 int n, total, size;
143 struct p9_fid *fid = filp->private_data; 148 struct p9_fid *fid = filp->private_data;
144 149
145 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, 150 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
@@ -147,6 +152,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
147 152
148 n = 0; 153 n = 0;
149 total = 0; 154 total = 0;
155 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
150 do { 156 do {
151 n = p9_client_read(fid, data, udata, offset, count); 157 n = p9_client_read(fid, data, udata, offset, count);
152 if (n <= 0) 158 if (n <= 0)
@@ -160,7 +166,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
160 offset += n; 166 offset += n;
161 count -= n; 167 count -= n;
162 total += n; 168 total += n;
163 } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ)); 169 } while (count > 0 && n == size);
164 170
165 if (n < 0) 171 if (n < 0)
166 total = n; 172 total = n;
@@ -183,11 +189,13 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
183{ 189{
184 int ret; 190 int ret;
185 struct p9_fid *fid; 191 struct p9_fid *fid;
192 size_t size;
186 193
187 P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); 194 P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
188 fid = filp->private_data; 195 fid = filp->private_data;
189 196
190 if (count > (fid->clnt->msize - P9_IOHDRSZ)) 197 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
198 if (count > size)
191 ret = v9fs_file_readn(filp, NULL, udata, count, *offset); 199 ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
192 else 200 else
193 ret = p9_client_read(fid, NULL, udata, *offset, count); 201 ret = p9_client_read(fid, NULL, udata, *offset, count);
@@ -224,9 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
224 fid = filp->private_data; 232 fid = filp->private_data;
225 clnt = fid->clnt; 233 clnt = fid->clnt;
226 234
227 rsize = fid->iounit; 235 rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ;
228 if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
229 rsize = clnt->msize - P9_IOHDRSZ;
230 236
231 do { 237 do {
232 if (count < rsize) 238 if (count < rsize)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4331b3b5ee1c..d97c34a24f7a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -35,6 +35,7 @@
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/xattr.h>
38#include <net/9p/9p.h> 39#include <net/9p/9p.h>
39#include <net/9p/client.h> 40#include <net/9p/client.h>
40 41
@@ -42,6 +43,7 @@
42#include "v9fs_vfs.h" 43#include "v9fs_vfs.h"
43#include "fid.h" 44#include "fid.h"
44#include "cache.h" 45#include "cache.h"
46#include "xattr.h"
45 47
46static const struct inode_operations v9fs_dir_inode_operations; 48static const struct inode_operations v9fs_dir_inode_operations;
47static const struct inode_operations v9fs_dir_inode_operations_dotu; 49static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -236,6 +238,41 @@ void v9fs_destroy_inode(struct inode *inode)
236#endif 238#endif
237 239
238/** 240/**
241 * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
242 * new file system object. This checks the S_ISGID to determine the owning
243 * group of the new file system object.
244 */
245
246static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
247{
248 BUG_ON(dir_inode == NULL);
249
250 if (dir_inode->i_mode & S_ISGID) {
251 /* set_gid bit is set.*/
252 return dir_inode->i_gid;
253 }
254 return current_fsgid();
255}
256
257/**
258 * v9fs_dentry_from_dir_inode - helper function to get the dentry from
259 * dir inode.
260 *
261 */
262
263static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
264{
265 struct dentry *dentry;
266
267 spin_lock(&dcache_lock);
268 /* Directory should have only one entry. */
269 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
270 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
271 spin_unlock(&dcache_lock);
272 return dentry;
273}
274
275/**
239 * v9fs_get_inode - helper function to setup an inode 276 * v9fs_get_inode - helper function to setup an inode
240 * @sb: superblock 277 * @sb: superblock
241 * @mode: mode to setup inode with 278 * @mode: mode to setup inode with
@@ -267,7 +304,13 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
267 case S_IFBLK: 304 case S_IFBLK:
268 case S_IFCHR: 305 case S_IFCHR:
269 case S_IFSOCK: 306 case S_IFSOCK:
270 if (!v9fs_proto_dotu(v9ses)) { 307 if (v9fs_proto_dotl(v9ses)) {
308 inode->i_op = &v9fs_file_inode_operations_dotl;
309 inode->i_fop = &v9fs_file_operations_dotl;
310 } else if (v9fs_proto_dotu(v9ses)) {
311 inode->i_op = &v9fs_file_inode_operations;
312 inode->i_fop = &v9fs_file_operations;
313 } else {
271 P9_DPRINTK(P9_DEBUG_ERROR, 314 P9_DPRINTK(P9_DEBUG_ERROR,
272 "special files without extended mode\n"); 315 "special files without extended mode\n");
273 err = -EINVAL; 316 err = -EINVAL;
@@ -387,8 +430,10 @@ error:
387 * @inode: inode to release 430 * @inode: inode to release
388 * 431 *
389 */ 432 */
390void v9fs_clear_inode(struct inode *inode) 433void v9fs_evict_inode(struct inode *inode)
391{ 434{
435 truncate_inode_pages(inode->i_mapping, 0);
436 end_writeback(inode);
392 filemap_fdatawrite(inode->i_mapping); 437 filemap_fdatawrite(inode->i_mapping);
393 438
394#ifdef CONFIG_9P_FSCACHE 439#ifdef CONFIG_9P_FSCACHE
@@ -396,23 +441,14 @@ void v9fs_clear_inode(struct inode *inode)
396#endif 441#endif
397} 442}
398 443
399/**
400 * v9fs_inode_from_fid - populate an inode by issuing a attribute request
401 * @v9ses: session information
402 * @fid: fid to issue attribute request for
403 * @sb: superblock on which to create inode
404 *
405 */
406
407static struct inode * 444static struct inode *
408v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 445v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
409 struct super_block *sb) 446 struct super_block *sb)
410{ 447{
411 int err, umode; 448 int err, umode;
412 struct inode *ret; 449 struct inode *ret = NULL;
413 struct p9_wstat *st; 450 struct p9_wstat *st;
414 451
415 ret = NULL;
416 st = p9_client_stat(fid); 452 st = p9_client_stat(fid);
417 if (IS_ERR(st)) 453 if (IS_ERR(st))
418 return ERR_CAST(st); 454 return ERR_CAST(st);
@@ -433,15 +469,62 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
433#endif 469#endif
434 p9stat_free(st); 470 p9stat_free(st);
435 kfree(st); 471 kfree(st);
436
437 return ret; 472 return ret;
438
439error: 473error:
440 p9stat_free(st); 474 p9stat_free(st);
441 kfree(st); 475 kfree(st);
442 return ERR_PTR(err); 476 return ERR_PTR(err);
443} 477}
444 478
479static struct inode *
480v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
481 struct super_block *sb)
482{
483 struct inode *ret = NULL;
484 int err;
485 struct p9_stat_dotl *st;
486
487 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
488 if (IS_ERR(st))
489 return ERR_CAST(st);
490
491 ret = v9fs_get_inode(sb, st->st_mode);
492 if (IS_ERR(ret)) {
493 err = PTR_ERR(ret);
494 goto error;
495 }
496
497 v9fs_stat2inode_dotl(st, ret);
498 ret->i_ino = v9fs_qid2ino(&st->qid);
499#ifdef CONFIG_9P_FSCACHE
500 v9fs_vcookie_set_qid(ret, &st->qid);
501 v9fs_cache_inode_get_cookie(ret);
502#endif
503 kfree(st);
504 return ret;
505error:
506 kfree(st);
507 return ERR_PTR(err);
508}
509
510/**
511 * v9fs_inode_from_fid - Helper routine to populate an inode by
512 * issuing a attribute request
513 * @v9ses: session information
514 * @fid: fid to issue attribute request for
515 * @sb: superblock on which to create inode
516 *
517 */
518static inline struct inode *
519v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
520 struct super_block *sb)
521{
522 if (v9fs_proto_dotl(v9ses))
523 return v9fs_inode_dotl(v9ses, fid, sb);
524 else
525 return v9fs_inode(v9ses, fid, sb);
526}
527
445/** 528/**
446 * v9fs_remove - helper function to remove files and directories 529 * v9fs_remove - helper function to remove files and directories
447 * @dir: directory inode that is being deleted 530 * @dir: directory inode that is being deleted
@@ -563,6 +646,118 @@ error:
563} 646}
564 647
565/** 648/**
649 * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
650 * @dir: directory inode that is being created
651 * @dentry: dentry that is being deleted
652 * @mode: create permissions
653 * @nd: path information
654 *
655 */
656
657static int
658v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
659 struct nameidata *nd)
660{
661 int err = 0;
662 char *name = NULL;
663 gid_t gid;
664 int flags;
665 struct v9fs_session_info *v9ses;
666 struct p9_fid *fid = NULL;
667 struct p9_fid *dfid, *ofid;
668 struct file *filp;
669 struct p9_qid qid;
670 struct inode *inode;
671
672 v9ses = v9fs_inode2v9ses(dir);
673 if (nd && nd->flags & LOOKUP_OPEN)
674 flags = nd->intent.open.flags - 1;
675 else
676 flags = O_RDWR;
677
678 name = (char *) dentry->d_name.name;
679 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
680 "mode:0x%x\n", name, flags, mode);
681
682 dfid = v9fs_fid_lookup(dentry->d_parent);
683 if (IS_ERR(dfid)) {
684 err = PTR_ERR(dfid);
685 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
686 return err;
687 }
688
689 /* clone a fid to use for creation */
690 ofid = p9_client_walk(dfid, 0, NULL, 1);
691 if (IS_ERR(ofid)) {
692 err = PTR_ERR(ofid);
693 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
694 return err;
695 }
696
697 gid = v9fs_get_fsgid_for_create(dir);
698 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
699 if (err < 0) {
700 P9_DPRINTK(P9_DEBUG_VFS,
701 "p9_client_open_dotl failed in creat %d\n",
702 err);
703 goto error;
704 }
705
706 /* No need to populate the inode if we are not opening the file AND
707 * not in cached mode.
708 */
709 if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
710 /* Not in cached mode. No need to populate inode with stat */
711 dentry->d_op = &v9fs_dentry_operations;
712 p9_client_clunk(ofid);
713 d_instantiate(dentry, NULL);
714 return 0;
715 }
716
717 /* Now walk from the parent so we can get an unopened fid. */
718 fid = p9_client_walk(dfid, 1, &name, 1);
719 if (IS_ERR(fid)) {
720 err = PTR_ERR(fid);
721 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
722 fid = NULL;
723 goto error;
724 }
725
726 /* instantiate inode and assign the unopened fid to dentry */
727 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
728 if (IS_ERR(inode)) {
729 err = PTR_ERR(inode);
730 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
731 goto error;
732 }
733 dentry->d_op = &v9fs_cached_dentry_operations;
734 d_instantiate(dentry, inode);
735 err = v9fs_fid_add(dentry, fid);
736 if (err < 0)
737 goto error;
738
739 /* if we are opening a file, assign the open fid to the file */
740 if (nd && nd->flags & LOOKUP_OPEN) {
741 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
742 if (IS_ERR(filp)) {
743 p9_client_clunk(ofid);
744 return PTR_ERR(filp);
745 }
746 filp->private_data = ofid;
747 } else
748 p9_client_clunk(ofid);
749
750 return 0;
751
752error:
753 if (ofid)
754 p9_client_clunk(ofid);
755 if (fid)
756 p9_client_clunk(fid);
757 return err;
758}
759
760/**
566 * v9fs_vfs_create - VFS hook to create files 761 * v9fs_vfs_create - VFS hook to create files
567 * @dir: directory inode that is being created 762 * @dir: directory inode that is being created
568 * @dentry: dentry that is being deleted 763 * @dentry: dentry that is being deleted
@@ -652,6 +847,83 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
652 return err; 847 return err;
653} 848}
654 849
850
851/**
852 * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
853 * @dir: inode that is being unlinked
854 * @dentry: dentry that is being unlinked
855 * @mode: mode for new directory
856 *
857 */
858
859static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
860 int mode)
861{
862 int err;
863 struct v9fs_session_info *v9ses;
864 struct p9_fid *fid = NULL, *dfid = NULL;
865 gid_t gid;
866 char *name;
867 struct inode *inode;
868 struct p9_qid qid;
869 struct dentry *dir_dentry;
870
871 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
872 err = 0;
873 v9ses = v9fs_inode2v9ses(dir);
874
875 mode |= S_IFDIR;
876 dir_dentry = v9fs_dentry_from_dir_inode(dir);
877 dfid = v9fs_fid_lookup(dir_dentry);
878 if (IS_ERR(dfid)) {
879 err = PTR_ERR(dfid);
880 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
881 dfid = NULL;
882 goto error;
883 }
884
885 gid = v9fs_get_fsgid_for_create(dir);
886 if (gid < 0) {
887 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
888 goto error;
889 }
890
891 name = (char *) dentry->d_name.name;
892 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
893 if (err < 0)
894 goto error;
895
896 /* instantiate inode and assign the unopened fid to the dentry */
897 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
898 fid = p9_client_walk(dfid, 1, &name, 1);
899 if (IS_ERR(fid)) {
900 err = PTR_ERR(fid);
901 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
902 err);
903 fid = NULL;
904 goto error;
905 }
906
907 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
908 if (IS_ERR(inode)) {
909 err = PTR_ERR(inode);
910 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
911 err);
912 goto error;
913 }
914 dentry->d_op = &v9fs_cached_dentry_operations;
915 d_instantiate(dentry, inode);
916 err = v9fs_fid_add(dentry, fid);
917 if (err < 0)
918 goto error;
919 fid = NULL;
920 }
921error:
922 if (fid)
923 p9_client_clunk(fid);
924 return err;
925}
926
655/** 927/**
656 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode 928 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
657 * @dir: inode that is being walked from 929 * @dir: inode that is being walked from
@@ -678,6 +950,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
678 950
679 sb = dir->i_sb; 951 sb = dir->i_sb;
680 v9ses = v9fs_inode2v9ses(dir); 952 v9ses = v9fs_inode2v9ses(dir);
953 /* We can walk d_parent because we hold the dir->i_mutex */
681 dfid = v9fs_fid_lookup(dentry->d_parent); 954 dfid = v9fs_fid_lookup(dentry->d_parent);
682 if (IS_ERR(dfid)) 955 if (IS_ERR(dfid))
683 return ERR_CAST(dfid); 956 return ERR_CAST(dfid);
@@ -785,27 +1058,33 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
785 goto clunk_olddir; 1058 goto clunk_olddir;
786 } 1059 }
787 1060
1061 down_write(&v9ses->rename_sem);
788 if (v9fs_proto_dotl(v9ses)) { 1062 if (v9fs_proto_dotl(v9ses)) {
789 retval = p9_client_rename(oldfid, newdirfid, 1063 retval = p9_client_rename(oldfid, newdirfid,
790 (char *) new_dentry->d_name.name); 1064 (char *) new_dentry->d_name.name);
791 if (retval != -ENOSYS) 1065 if (retval != -ENOSYS)
792 goto clunk_newdir; 1066 goto clunk_newdir;
793 } 1067 }
1068 if (old_dentry->d_parent != new_dentry->d_parent) {
1069 /*
1070 * 9P .u can only handle file rename in the same directory
1071 */
794 1072
795 /* 9P can only handle file rename in the same directory */
796 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
797 P9_DPRINTK(P9_DEBUG_ERROR, 1073 P9_DPRINTK(P9_DEBUG_ERROR,
798 "old dir and new dir are different\n"); 1074 "old dir and new dir are different\n");
799 retval = -EXDEV; 1075 retval = -EXDEV;
800 goto clunk_newdir; 1076 goto clunk_newdir;
801 } 1077 }
802
803 v9fs_blank_wstat(&wstat); 1078 v9fs_blank_wstat(&wstat);
804 wstat.muid = v9ses->uname; 1079 wstat.muid = v9ses->uname;
805 wstat.name = (char *) new_dentry->d_name.name; 1080 wstat.name = (char *) new_dentry->d_name.name;
806 retval = p9_client_wstat(oldfid, &wstat); 1081 retval = p9_client_wstat(oldfid, &wstat);
807 1082
808clunk_newdir: 1083clunk_newdir:
1084 if (!retval)
1085 /* successful rename */
1086 d_move(old_dentry, new_dentry);
1087 up_write(&v9ses->rename_sem);
809 p9_client_clunk(newdirfid); 1088 p9_client_clunk(newdirfid);
810 1089
811clunk_olddir: 1090clunk_olddir:
@@ -853,6 +1132,42 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
853 return 0; 1132 return 0;
854} 1133}
855 1134
1135static int
1136v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
1137 struct kstat *stat)
1138{
1139 int err;
1140 struct v9fs_session_info *v9ses;
1141 struct p9_fid *fid;
1142 struct p9_stat_dotl *st;
1143
1144 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
1145 err = -EPERM;
1146 v9ses = v9fs_inode2v9ses(dentry->d_inode);
1147 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
1148 return simple_getattr(mnt, dentry, stat);
1149
1150 fid = v9fs_fid_lookup(dentry);
1151 if (IS_ERR(fid))
1152 return PTR_ERR(fid);
1153
1154 /* Ask for all the fields in stat structure. Server will return
1155 * whatever it supports
1156 */
1157
1158 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
1159 if (IS_ERR(st))
1160 return PTR_ERR(st);
1161
1162 v9fs_stat2inode_dotl(st, dentry->d_inode);
1163 generic_fillattr(dentry->d_inode, stat);
1164 /* Change block size to what the server returned */
1165 stat->blksize = st->st_blksize;
1166
1167 kfree(st);
1168 return 0;
1169}
1170
856/** 1171/**
857 * v9fs_vfs_setattr - set file metadata 1172 * v9fs_vfs_setattr - set file metadata
858 * @dentry: file whose metadata to set 1173 * @dentry: file whose metadata to set
@@ -896,6 +1211,58 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
896 } 1211 }
897 1212
898 retval = p9_client_wstat(fid, &wstat); 1213 retval = p9_client_wstat(fid, &wstat);
1214 if (retval < 0)
1215 return retval;
1216
1217 if ((iattr->ia_valid & ATTR_SIZE) &&
1218 iattr->ia_size != i_size_read(dentry->d_inode)) {
1219 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
1220 if (retval)
1221 return retval;
1222 }
1223
1224 setattr_copy(dentry->d_inode, iattr);
1225 mark_inode_dirty(dentry->d_inode);
1226 return 0;
1227}
1228
1229/**
1230 * v9fs_vfs_setattr_dotl - set file metadata
1231 * @dentry: file whose metadata to set
1232 * @iattr: metadata assignment structure
1233 *
1234 */
1235
1236static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
1237{
1238 int retval;
1239 struct v9fs_session_info *v9ses;
1240 struct p9_fid *fid;
1241 struct p9_iattr_dotl p9attr;
1242
1243 P9_DPRINTK(P9_DEBUG_VFS, "\n");
1244
1245 retval = inode_change_ok(dentry->d_inode, iattr);
1246 if (retval)
1247 return retval;
1248
1249 p9attr.valid = iattr->ia_valid;
1250 p9attr.mode = iattr->ia_mode;
1251 p9attr.uid = iattr->ia_uid;
1252 p9attr.gid = iattr->ia_gid;
1253 p9attr.size = iattr->ia_size;
1254 p9attr.atime_sec = iattr->ia_atime.tv_sec;
1255 p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
1256 p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
1257 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
1258
1259 retval = -EPERM;
1260 v9ses = v9fs_inode2v9ses(dentry->d_inode);
1261 fid = v9fs_fid_lookup(dentry);
1262 if (IS_ERR(fid))
1263 return PTR_ERR(fid);
1264
1265 retval = p9_client_setattr(fid, &p9attr);
899 if (retval >= 0) 1266 if (retval >= 0)
900 retval = inode_setattr(dentry->d_inode, iattr); 1267 retval = inode_setattr(dentry->d_inode, iattr);
901 1268
@@ -980,6 +1347,77 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
980} 1347}
981 1348
982/** 1349/**
1350 * v9fs_stat2inode_dotl - populate an inode structure with stat info
1351 * @stat: stat structure
1352 * @inode: inode to populate
1353 * @sb: superblock of filesystem
1354 *
1355 */
1356
1357void
1358v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
1359{
1360
1361 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
1362 inode->i_atime.tv_sec = stat->st_atime_sec;
1363 inode->i_atime.tv_nsec = stat->st_atime_nsec;
1364 inode->i_mtime.tv_sec = stat->st_mtime_sec;
1365 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
1366 inode->i_ctime.tv_sec = stat->st_ctime_sec;
1367 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
1368 inode->i_uid = stat->st_uid;
1369 inode->i_gid = stat->st_gid;
1370 inode->i_nlink = stat->st_nlink;
1371 inode->i_mode = stat->st_mode;
1372 inode->i_rdev = new_decode_dev(stat->st_rdev);
1373
1374 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
1375 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1376
1377 i_size_write(inode, stat->st_size);
1378 inode->i_blocks = stat->st_blocks;
1379 } else {
1380 if (stat->st_result_mask & P9_STATS_ATIME) {
1381 inode->i_atime.tv_sec = stat->st_atime_sec;
1382 inode->i_atime.tv_nsec = stat->st_atime_nsec;
1383 }
1384 if (stat->st_result_mask & P9_STATS_MTIME) {
1385 inode->i_mtime.tv_sec = stat->st_mtime_sec;
1386 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
1387 }
1388 if (stat->st_result_mask & P9_STATS_CTIME) {
1389 inode->i_ctime.tv_sec = stat->st_ctime_sec;
1390 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
1391 }
1392 if (stat->st_result_mask & P9_STATS_UID)
1393 inode->i_uid = stat->st_uid;
1394 if (stat->st_result_mask & P9_STATS_GID)
1395 inode->i_gid = stat->st_gid;
1396 if (stat->st_result_mask & P9_STATS_NLINK)
1397 inode->i_nlink = stat->st_nlink;
1398 if (stat->st_result_mask & P9_STATS_MODE) {
1399 inode->i_mode = stat->st_mode;
1400 if ((S_ISBLK(inode->i_mode)) ||
1401 (S_ISCHR(inode->i_mode)))
1402 init_special_inode(inode, inode->i_mode,
1403 inode->i_rdev);
1404 }
1405 if (stat->st_result_mask & P9_STATS_RDEV)
1406 inode->i_rdev = new_decode_dev(stat->st_rdev);
1407 if (stat->st_result_mask & P9_STATS_SIZE)
1408 i_size_write(inode, stat->st_size);
1409 if (stat->st_result_mask & P9_STATS_BLOCKS)
1410 inode->i_blocks = stat->st_blocks;
1411 }
1412 if (stat->st_result_mask & P9_STATS_GEN)
1413 inode->i_generation = stat->st_gen;
1414
1415 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
1416 * because the inode structure does not have fields for them.
1417 */
1418}
1419
1420/**
983 * v9fs_qid2ino - convert qid into inode number 1421 * v9fs_qid2ino - convert qid into inode number
984 * @qid: qid to hash 1422 * @qid: qid to hash
985 * 1423 *
@@ -1022,7 +1460,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1022 if (IS_ERR(fid)) 1460 if (IS_ERR(fid))
1023 return PTR_ERR(fid); 1461 return PTR_ERR(fid);
1024 1462
1025 if (!v9fs_proto_dotu(v9ses)) 1463 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
1026 return -EBADF; 1464 return -EBADF;
1027 1465
1028 st = p9_client_stat(fid); 1466 st = p9_client_stat(fid);
@@ -1128,6 +1566,99 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1128} 1566}
1129 1567
1130/** 1568/**
1569 * v9fs_vfs_symlink_dotl - helper function to create symlinks
1570 * @dir: directory inode containing symlink
1571 * @dentry: dentry for symlink
1572 * @symname: symlink data
1573 *
1574 * See Also: 9P2000.L RFC for more information
1575 *
1576 */
1577
1578static int
1579v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1580 const char *symname)
1581{
1582 struct v9fs_session_info *v9ses;
1583 struct p9_fid *dfid;
1584 struct p9_fid *fid = NULL;
1585 struct inode *inode;
1586 struct p9_qid qid;
1587 char *name;
1588 int err;
1589 gid_t gid;
1590
1591 name = (char *) dentry->d_name.name;
1592 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
1593 dir->i_ino, name, symname);
1594 v9ses = v9fs_inode2v9ses(dir);
1595
1596 dfid = v9fs_fid_lookup(dentry->d_parent);
1597 if (IS_ERR(dfid)) {
1598 err = PTR_ERR(dfid);
1599 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
1600 return err;
1601 }
1602
1603 gid = v9fs_get_fsgid_for_create(dir);
1604
1605 if (gid < 0) {
1606 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
1607 goto error;
1608 }
1609
1610 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
1611 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
1612
1613 if (err < 0) {
1614 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
1615 goto error;
1616 }
1617
1618 if (v9ses->cache) {
1619 /* Now walk from the parent so we can get an unopened fid. */
1620 fid = p9_client_walk(dfid, 1, &name, 1);
1621 if (IS_ERR(fid)) {
1622 err = PTR_ERR(fid);
1623 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
1624 err);
1625 fid = NULL;
1626 goto error;
1627 }
1628
1629 /* instantiate inode and assign the unopened fid to dentry */
1630 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
1631 if (IS_ERR(inode)) {
1632 err = PTR_ERR(inode);
1633 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
1634 err);
1635 goto error;
1636 }
1637 dentry->d_op = &v9fs_cached_dentry_operations;
1638 d_instantiate(dentry, inode);
1639 err = v9fs_fid_add(dentry, fid);
1640 if (err < 0)
1641 goto error;
1642 fid = NULL;
1643 } else {
1644 /* Not in cached mode. No need to populate inode with stat */
1645 inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
1646 if (IS_ERR(inode)) {
1647 err = PTR_ERR(inode);
1648 goto error;
1649 }
1650 dentry->d_op = &v9fs_dentry_operations;
1651 d_instantiate(dentry, inode);
1652 }
1653
1654error:
1655 if (fid)
1656 p9_client_clunk(fid);
1657
1658 return err;
1659}
1660
1661/**
1131 * v9fs_vfs_symlink - helper function to create symlinks 1662 * v9fs_vfs_symlink - helper function to create symlinks
1132 * @dir: directory inode containing symlink 1663 * @dir: directory inode containing symlink
1133 * @dentry: dentry for symlink 1664 * @dentry: dentry for symlink
@@ -1186,6 +1717,76 @@ clunk_fid:
1186} 1717}
1187 1718
1188/** 1719/**
1720 * v9fs_vfs_link_dotl - create a hardlink for dotl
1721 * @old_dentry: dentry for file to link to
1722 * @dir: inode destination for new link
1723 * @dentry: dentry for link
1724 *
1725 */
1726
1727static int
1728v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
1729 struct dentry *dentry)
1730{
1731 int err;
1732 struct p9_fid *dfid, *oldfid;
1733 char *name;
1734 struct v9fs_session_info *v9ses;
1735 struct dentry *dir_dentry;
1736
1737 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
1738 dir->i_ino, old_dentry->d_name.name,
1739 dentry->d_name.name);
1740
1741 v9ses = v9fs_inode2v9ses(dir);
1742 dir_dentry = v9fs_dentry_from_dir_inode(dir);
1743 dfid = v9fs_fid_lookup(dir_dentry);
1744 if (IS_ERR(dfid))
1745 return PTR_ERR(dfid);
1746
1747 oldfid = v9fs_fid_lookup(old_dentry);
1748 if (IS_ERR(oldfid))
1749 return PTR_ERR(oldfid);
1750
1751 name = (char *) dentry->d_name.name;
1752
1753 err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
1754
1755 if (err < 0) {
1756 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
1757 return err;
1758 }
1759
1760 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
1761 /* Get the latest stat info from server. */
1762 struct p9_fid *fid;
1763 struct p9_stat_dotl *st;
1764
1765 fid = v9fs_fid_lookup(old_dentry);
1766 if (IS_ERR(fid))
1767 return PTR_ERR(fid);
1768
1769 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
1770 if (IS_ERR(st))
1771 return PTR_ERR(st);
1772
1773 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
1774
1775 kfree(st);
1776 } else {
1777 /* Caching disabled. No need to get upto date stat info.
1778 * This dentry will be released immediately. So, just i_count++
1779 */
1780 atomic_inc(&old_dentry->d_inode->i_count);
1781 }
1782
1783 dentry->d_op = old_dentry->d_op;
1784 d_instantiate(dentry, old_dentry->d_inode);
1785
1786 return err;
1787}
1788
1789/**
1189 * v9fs_vfs_mknod - create a special file 1790 * v9fs_vfs_mknod - create a special file
1190 * @dir: inode destination for new link 1791 * @dir: inode destination for new link
1191 * @dentry: dentry for file 1792 * @dentry: dentry for file
@@ -1230,6 +1831,100 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1230 return retval; 1831 return retval;
1231} 1832}
1232 1833
1834/**
1835 * v9fs_vfs_mknod_dotl - create a special file
1836 * @dir: inode destination for new link
1837 * @dentry: dentry for file
1838 * @mode: mode for creation
1839 * @rdev: device associated with special file
1840 *
1841 */
1842static int
1843v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
1844 dev_t rdev)
1845{
1846 int err;
1847 char *name;
1848 struct v9fs_session_info *v9ses;
1849 struct p9_fid *fid = NULL, *dfid = NULL;
1850 struct inode *inode;
1851 gid_t gid;
1852 struct p9_qid qid;
1853 struct dentry *dir_dentry;
1854
1855 P9_DPRINTK(P9_DEBUG_VFS,
1856 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1857 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1858
1859 if (!new_valid_dev(rdev))
1860 return -EINVAL;
1861
1862 v9ses = v9fs_inode2v9ses(dir);
1863 dir_dentry = v9fs_dentry_from_dir_inode(dir);
1864 dfid = v9fs_fid_lookup(dir_dentry);
1865 if (IS_ERR(dfid)) {
1866 err = PTR_ERR(dfid);
1867 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
1868 dfid = NULL;
1869 goto error;
1870 }
1871
1872 gid = v9fs_get_fsgid_for_create(dir);
1873 if (gid < 0) {
1874 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
1875 goto error;
1876 }
1877
1878 name = (char *) dentry->d_name.name;
1879
1880 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
1881 if (err < 0)
1882 goto error;
1883
1884 /* instantiate inode and assign the unopened fid to the dentry */
1885 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
1886 fid = p9_client_walk(dfid, 1, &name, 1);
1887 if (IS_ERR(fid)) {
1888 err = PTR_ERR(fid);
1889 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
1890 err);
1891 fid = NULL;
1892 goto error;
1893 }
1894
1895 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
1896 if (IS_ERR(inode)) {
1897 err = PTR_ERR(inode);
1898 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
1899 err);
1900 goto error;
1901 }
1902 dentry->d_op = &v9fs_cached_dentry_operations;
1903 d_instantiate(dentry, inode);
1904 err = v9fs_fid_add(dentry, fid);
1905 if (err < 0)
1906 goto error;
1907 fid = NULL;
1908 } else {
1909 /*
1910 * Not in cached mode. No need to populate inode with stat.
1911 * socket syscall returns a fd, so we need instantiate
1912 */
1913 inode = v9fs_get_inode(dir->i_sb, mode);
1914 if (IS_ERR(inode)) {
1915 err = PTR_ERR(inode);
1916 goto error;
1917 }
1918 dentry->d_op = &v9fs_dentry_operations;
1919 d_instantiate(dentry, inode);
1920 }
1921
1922error:
1923 if (fid)
1924 p9_client_clunk(fid);
1925 return err;
1926}
1927
1233static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1928static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1234 .create = v9fs_vfs_create, 1929 .create = v9fs_vfs_create,
1235 .lookup = v9fs_vfs_lookup, 1930 .lookup = v9fs_vfs_lookup,
@@ -1238,24 +1933,29 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1238 .unlink = v9fs_vfs_unlink, 1933 .unlink = v9fs_vfs_unlink,
1239 .mkdir = v9fs_vfs_mkdir, 1934 .mkdir = v9fs_vfs_mkdir,
1240 .rmdir = v9fs_vfs_rmdir, 1935 .rmdir = v9fs_vfs_rmdir,
1241 .mknod = v9fs_vfs_mknod, 1936 .mknod = v9fs_vfs_mknod_dotl,
1242 .rename = v9fs_vfs_rename, 1937 .rename = v9fs_vfs_rename,
1243 .getattr = v9fs_vfs_getattr, 1938 .getattr = v9fs_vfs_getattr,
1244 .setattr = v9fs_vfs_setattr, 1939 .setattr = v9fs_vfs_setattr,
1245}; 1940};
1246 1941
1247static const struct inode_operations v9fs_dir_inode_operations_dotl = { 1942static const struct inode_operations v9fs_dir_inode_operations_dotl = {
1248 .create = v9fs_vfs_create, 1943 .create = v9fs_vfs_create_dotl,
1249 .lookup = v9fs_vfs_lookup, 1944 .lookup = v9fs_vfs_lookup,
1250 .symlink = v9fs_vfs_symlink, 1945 .link = v9fs_vfs_link_dotl,
1251 .link = v9fs_vfs_link, 1946 .symlink = v9fs_vfs_symlink_dotl,
1252 .unlink = v9fs_vfs_unlink, 1947 .unlink = v9fs_vfs_unlink,
1253 .mkdir = v9fs_vfs_mkdir, 1948 .mkdir = v9fs_vfs_mkdir_dotl,
1254 .rmdir = v9fs_vfs_rmdir, 1949 .rmdir = v9fs_vfs_rmdir,
1255 .mknod = v9fs_vfs_mknod, 1950 .mknod = v9fs_vfs_mknod_dotl,
1256 .rename = v9fs_vfs_rename, 1951 .rename = v9fs_vfs_rename,
1257 .getattr = v9fs_vfs_getattr, 1952 .getattr = v9fs_vfs_getattr_dotl,
1258 .setattr = v9fs_vfs_setattr, 1953 .setattr = v9fs_vfs_setattr_dotl,
1954 .setxattr = generic_setxattr,
1955 .getxattr = generic_getxattr,
1956 .removexattr = generic_removexattr,
1957 .listxattr = v9fs_listxattr,
1958
1259}; 1959};
1260 1960
1261static const struct inode_operations v9fs_dir_inode_operations = { 1961static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1276,8 +1976,12 @@ static const struct inode_operations v9fs_file_inode_operations = {
1276}; 1976};
1277 1977
1278static const struct inode_operations v9fs_file_inode_operations_dotl = { 1978static const struct inode_operations v9fs_file_inode_operations_dotl = {
1279 .getattr = v9fs_vfs_getattr, 1979 .getattr = v9fs_vfs_getattr_dotl,
1280 .setattr = v9fs_vfs_setattr, 1980 .setattr = v9fs_vfs_setattr_dotl,
1981 .setxattr = generic_setxattr,
1982 .getxattr = generic_getxattr,
1983 .removexattr = generic_removexattr,
1984 .listxattr = v9fs_listxattr,
1281}; 1985};
1282 1986
1283static const struct inode_operations v9fs_symlink_inode_operations = { 1987static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -1292,6 +1996,10 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
1292 .readlink = generic_readlink, 1996 .readlink = generic_readlink,
1293 .follow_link = v9fs_vfs_follow_link, 1997 .follow_link = v9fs_vfs_follow_link,
1294 .put_link = v9fs_vfs_put_link, 1998 .put_link = v9fs_vfs_put_link,
1295 .getattr = v9fs_vfs_getattr, 1999 .getattr = v9fs_vfs_getattr_dotl,
1296 .setattr = v9fs_vfs_setattr, 2000 .setattr = v9fs_vfs_setattr_dotl,
2001 .setxattr = generic_setxattr,
2002 .getxattr = generic_getxattr,
2003 .removexattr = generic_removexattr,
2004 .listxattr = v9fs_listxattr,
1297}; 2005};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index be74d020436e..f9311077de68 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -45,6 +45,7 @@
45#include "v9fs.h" 45#include "v9fs.h"
46#include "v9fs_vfs.h" 46#include "v9fs_vfs.h"
47#include "fid.h" 47#include "fid.h"
48#include "xattr.h"
48 49
49static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl; 50static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
50 51
@@ -77,9 +78,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
77 sb->s_blocksize_bits = fls(v9ses->maxdata - 1); 78 sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
78 sb->s_blocksize = 1 << sb->s_blocksize_bits; 79 sb->s_blocksize = 1 << sb->s_blocksize_bits;
79 sb->s_magic = V9FS_MAGIC; 80 sb->s_magic = V9FS_MAGIC;
80 if (v9fs_proto_dotl(v9ses)) 81 if (v9fs_proto_dotl(v9ses)) {
81 sb->s_op = &v9fs_super_ops_dotl; 82 sb->s_op = &v9fs_super_ops_dotl;
82 else 83 sb->s_xattr = v9fs_xattr_handlers;
84 } else
83 sb->s_op = &v9fs_super_ops; 85 sb->s_op = &v9fs_super_ops;
84 sb->s_bdi = &v9ses->bdi; 86 sb->s_bdi = &v9ses->bdi;
85 87
@@ -107,7 +109,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
107 struct inode *inode = NULL; 109 struct inode *inode = NULL;
108 struct dentry *root = NULL; 110 struct dentry *root = NULL;
109 struct v9fs_session_info *v9ses = NULL; 111 struct v9fs_session_info *v9ses = NULL;
110 struct p9_wstat *st = NULL;
111 int mode = S_IRWXUGO | S_ISVTX; 112 int mode = S_IRWXUGO | S_ISVTX;
112 struct p9_fid *fid; 113 struct p9_fid *fid;
113 int retval = 0; 114 int retval = 0;
@@ -124,16 +125,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
124 goto close_session; 125 goto close_session;
125 } 126 }
126 127
127 st = p9_client_stat(fid);
128 if (IS_ERR(st)) {
129 retval = PTR_ERR(st);
130 goto clunk_fid;
131 }
132
133 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 128 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
134 if (IS_ERR(sb)) { 129 if (IS_ERR(sb)) {
135 retval = PTR_ERR(sb); 130 retval = PTR_ERR(sb);
136 goto free_stat; 131 goto clunk_fid;
137 } 132 }
138 v9fs_fill_super(sb, v9ses, flags, data); 133 v9fs_fill_super(sb, v9ses, flags, data);
139 134
@@ -151,22 +146,38 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
151 } 146 }
152 147
153 sb->s_root = root; 148 sb->s_root = root;
154 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
155 149
156 v9fs_stat2inode(st, root->d_inode, sb); 150 if (v9fs_proto_dotl(v9ses)) {
151 struct p9_stat_dotl *st = NULL;
152 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
153 if (IS_ERR(st)) {
154 retval = PTR_ERR(st);
155 goto clunk_fid;
156 }
157
158 v9fs_stat2inode_dotl(st, root->d_inode);
159 kfree(st);
160 } else {
161 struct p9_wstat *st = NULL;
162 st = p9_client_stat(fid);
163 if (IS_ERR(st)) {
164 retval = PTR_ERR(st);
165 goto clunk_fid;
166 }
167
168 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
169 v9fs_stat2inode(st, root->d_inode, sb);
170
171 p9stat_free(st);
172 kfree(st);
173 }
157 174
158 v9fs_fid_add(root, fid); 175 v9fs_fid_add(root, fid);
159 p9stat_free(st);
160 kfree(st);
161 176
162P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 177P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
163 simple_set_mnt(mnt, sb); 178 simple_set_mnt(mnt, sb);
164 return 0; 179 return 0;
165 180
166free_stat:
167 p9stat_free(st);
168 kfree(st);
169
170clunk_fid: 181clunk_fid:
171 p9_client_clunk(fid); 182 p9_client_clunk(fid);
172 183
@@ -176,8 +187,6 @@ close_session:
176 return retval; 187 return retval;
177 188
178release_sb: 189release_sb:
179 p9stat_free(st);
180 kfree(st);
181 deactivate_locked_super(sb); 190 deactivate_locked_super(sb);
182 return retval; 191 return retval;
183} 192}
@@ -257,7 +266,7 @@ static const struct super_operations v9fs_super_ops = {
257 .destroy_inode = v9fs_destroy_inode, 266 .destroy_inode = v9fs_destroy_inode,
258#endif 267#endif
259 .statfs = simple_statfs, 268 .statfs = simple_statfs,
260 .clear_inode = v9fs_clear_inode, 269 .evict_inode = v9fs_evict_inode,
261 .show_options = generic_show_options, 270 .show_options = generic_show_options,
262 .umount_begin = v9fs_umount_begin, 271 .umount_begin = v9fs_umount_begin,
263}; 272};
@@ -268,7 +277,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
268 .destroy_inode = v9fs_destroy_inode, 277 .destroy_inode = v9fs_destroy_inode,
269#endif 278#endif
270 .statfs = v9fs_statfs, 279 .statfs = v9fs_statfs,
271 .clear_inode = v9fs_clear_inode, 280 .evict_inode = v9fs_evict_inode,
272 .show_options = generic_show_options, 281 .show_options = generic_show_options,
273 .umount_begin = v9fs_umount_begin, 282 .umount_begin = v9fs_umount_begin,
274}; 283};
@@ -278,4 +287,5 @@ struct file_system_type v9fs_fs_type = {
278 .get_sb = v9fs_get_sb, 287 .get_sb = v9fs_get_sb,
279 .kill_sb = v9fs_kill_super, 288 .kill_sb = v9fs_kill_super,
280 .owner = THIS_MODULE, 289 .owner = THIS_MODULE,
290 .fs_flags = FS_RENAME_DOES_D_MOVE,
281}; 291};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
new file mode 100644
index 000000000000..f88e5c2dc873
--- /dev/null
+++ b/fs/9p/xattr.c
@@ -0,0 +1,160 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/fs.h>
17#include <linux/sched.h>
18#include <net/9p/9p.h>
19#include <net/9p/client.h>
20
21#include "fid.h"
22#include "xattr.h"
23
24/*
25 * v9fs_xattr_get()
26 *
27 * Copy an extended attribute into the buffer
28 * provided, or compute the buffer size required.
29 * Buffer is NULL to compute the size of the buffer required.
30 *
31 * Returns a negative error number on failure, or the number of bytes
32 * used / required on success.
33 */
34ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t buffer_size)
36{
37 ssize_t retval;
38 int msize, read_count;
39 u64 offset = 0, attr_size;
40 struct p9_fid *fid, *attr_fid;
41
42 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
43 __func__, name, buffer_size);
44
45 fid = v9fs_fid_lookup(dentry);
46 if (IS_ERR(fid))
47 return PTR_ERR(fid);
48
49 attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
50 if (IS_ERR(attr_fid)) {
51 retval = PTR_ERR(attr_fid);
52 P9_DPRINTK(P9_DEBUG_VFS,
53 "p9_client_attrwalk failed %zd\n", retval);
54 attr_fid = NULL;
55 goto error;
56 }
57 if (!buffer_size) {
58 /* request to get the attr_size */
59 retval = attr_size;
60 goto error;
61 }
62 if (attr_size > buffer_size) {
63 retval = -ERANGE;
64 goto error;
65 }
66 msize = attr_fid->clnt->msize;
67 while (attr_size) {
68 if (attr_size > (msize - P9_IOHDRSZ))
69 read_count = msize - P9_IOHDRSZ;
70 else
71 read_count = attr_size;
72 read_count = p9_client_read(attr_fid, ((char *)buffer)+offset,
73 NULL, offset, read_count);
74 if (read_count < 0) {
75 /* error in xattr read */
76 retval = read_count;
77 goto error;
78 }
79 offset += read_count;
80 attr_size -= read_count;
81 }
82 /* Total read xattr bytes */
83 retval = offset;
84error:
85 if (attr_fid)
86 p9_client_clunk(attr_fid);
87 return retval;
88
89}
90
91/*
92 * v9fs_xattr_set()
93 *
94 * Create, replace or remove an extended attribute for this inode. Buffer
95 * is NULL to remove an existing extended attribute, and non-NULL to
96 * either replace an existing extended attribute, or create a new extended
97 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
98 * specify that an extended attribute must exist and must not exist
99 * previous to the call, respectively.
100 *
101 * Returns 0, or a negative error number on failure.
102 */
103int v9fs_xattr_set(struct dentry *dentry, const char *name,
104 const void *value, size_t value_len, int flags)
105{
106 u64 offset = 0;
107 int retval, msize, write_count;
108 struct p9_fid *fid = NULL;
109
110 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n",
111 __func__, name, value_len, flags);
112
113 fid = v9fs_fid_clone(dentry);
114 if (IS_ERR(fid)) {
115 retval = PTR_ERR(fid);
116 fid = NULL;
117 goto error;
118 }
119 /*
120 * On success fid points to xattr
121 */
122 retval = p9_client_xattrcreate(fid, name, value_len, flags);
123 if (retval < 0) {
124 P9_DPRINTK(P9_DEBUG_VFS,
125 "p9_client_xattrcreate failed %d\n", retval);
126 goto error;
127 }
128 msize = fid->clnt->msize;;
129 while (value_len) {
130 if (value_len > (msize - P9_IOHDRSZ))
131 write_count = msize - P9_IOHDRSZ;
132 else
133 write_count = value_len;
134 write_count = p9_client_write(fid, ((char *)value)+offset,
135 NULL, offset, write_count);
136 if (write_count < 0) {
137 /* error in xattr write */
138 retval = write_count;
139 goto error;
140 }
141 offset += write_count;
142 value_len -= write_count;
143 }
144 /* Total read xattr bytes */
145 retval = offset;
146error:
147 if (fid)
148 retval = p9_client_clunk(fid);
149 return retval;
150}
151
152ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
153{
154 return v9fs_xattr_get(dentry, NULL, buffer, buffer_size);
155}
156
157const struct xattr_handler *v9fs_xattr_handlers[] = {
158 &v9fs_xattr_user_handler,
159 NULL
160};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
new file mode 100644
index 000000000000..9ddf672ae5c4
--- /dev/null
+++ b/fs/9p/xattr.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14#ifndef FS_9P_XATTR_H
15#define FS_9P_XATTR_H
16
17#include <linux/xattr.h>
18
19extern const struct xattr_handler *v9fs_xattr_handlers[];
20extern struct xattr_handler v9fs_xattr_user_handler;
21
22extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
23 void *, size_t);
24extern int v9fs_xattr_set(struct dentry *, const char *,
25 const void *, size_t, int);
26extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t);
27#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c
new file mode 100644
index 000000000000..d0b701b72080
--- /dev/null
+++ b/fs/9p/xattr_user.c
@@ -0,0 +1,80 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15
16#include <linux/module.h>
17#include <linux/string.h>
18#include <linux/fs.h>
19#include <linux/slab.h>
20#include "xattr.h"
21
22static int v9fs_xattr_user_get(struct dentry *dentry, const char *name,
23 void *buffer, size_t size, int type)
24{
25 int retval;
26 char *full_name;
27 size_t name_len;
28 size_t prefix_len = XATTR_USER_PREFIX_LEN;
29
30 if (name == NULL)
31 return -EINVAL;
32
33 if (strcmp(name, "") == 0)
34 return -EINVAL;
35
36 name_len = strlen(name);
37 full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
38 if (!full_name)
39 return -ENOMEM;
40 memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
41 memcpy(full_name+prefix_len, name, name_len);
42 full_name[prefix_len + name_len] = '\0';
43
44 retval = v9fs_xattr_get(dentry, full_name, buffer, size);
45 kfree(full_name);
46 return retval;
47}
48
49static int v9fs_xattr_user_set(struct dentry *dentry, const char *name,
50 const void *value, size_t size, int flags, int type)
51{
52 int retval;
53 char *full_name;
54 size_t name_len;
55 size_t prefix_len = XATTR_USER_PREFIX_LEN;
56
57 if (name == NULL)
58 return -EINVAL;
59
60 if (strcmp(name, "") == 0)
61 return -EINVAL;
62
63 name_len = strlen(name);
64 full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
65 if (!full_name)
66 return -ENOMEM;
67 memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
68 memcpy(full_name + prefix_len, name, name_len);
69 full_name[prefix_len + name_len] = '\0';
70
71 retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
72 kfree(full_name);
73 return retval;
74}
75
76struct xattr_handler v9fs_xattr_user_handler = {
77 .prefix = XATTR_USER_PREFIX,
78 .get = v9fs_xattr_user_get,
79 .set = v9fs_xattr_user_set,
80};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f85b5947613..3d185308ec88 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -64,7 +64,7 @@ source "fs/autofs4/Kconfig"
64source "fs/fuse/Kconfig" 64source "fs/fuse/Kconfig"
65 65
66config CUSE 66config CUSE
67 tristate "Character device in Userpace support" 67 tristate "Character device in Userspace support"
68 depends on FUSE_FS 68 depends on FUSE_FS
69 help 69 help
70 This FUSE extension allows character devices to be 70 This FUSE extension allows character devices to be
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 6f850b06ab62..65794b8fe79e 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping,
50 loff_t pos, unsigned len, unsigned flags, 50 loff_t pos, unsigned len, unsigned flags,
51 struct page **pagep, void **fsdata) 51 struct page **pagep, void **fsdata)
52{ 52{
53 int ret;
54
53 *pagep = NULL; 55 *pagep = NULL;
54 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 56 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
55 adfs_get_block, 57 adfs_get_block,
56 &ADFS_I(mapping->host)->mmu_private); 58 &ADFS_I(mapping->host)->mmu_private);
59 if (unlikely(ret)) {
60 loff_t isize = mapping->host->i_size;
61 if (pos + len > isize)
62 vmtruncate(mapping->host, isize);
63 }
64
65 return ret;
57} 66}
58 67
59static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) 68static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
@@ -324,10 +333,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
324 333
325 /* XXX: this is missing some actual on-disk truncation.. */ 334 /* XXX: this is missing some actual on-disk truncation.. */
326 if (ia_valid & ATTR_SIZE) 335 if (ia_valid & ATTR_SIZE)
327 error = simple_setsize(inode, attr->ia_size); 336 truncate_setsize(inode, attr->ia_size);
328
329 if (error)
330 goto out;
331 337
332 if (ia_valid & ATTR_MTIME) { 338 if (ia_valid & ATTR_MTIME) {
333 inode->i_mtime = attr->ia_mtime; 339 inode->i_mtime = attr->ia_mtime;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index f05b6155ccc8..a8cbdeb34025 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -171,8 +171,7 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
171extern unsigned long affs_parent_ino(struct inode *dir); 171extern unsigned long affs_parent_ino(struct inode *dir);
172extern struct inode *affs_new_inode(struct inode *dir); 172extern struct inode *affs_new_inode(struct inode *dir);
173extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); 173extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
174extern void affs_delete_inode(struct inode *inode); 174extern void affs_evict_inode(struct inode *inode);
175extern void affs_clear_inode(struct inode *inode);
176extern struct inode *affs_iget(struct super_block *sb, 175extern struct inode *affs_iget(struct super_block *sb,
177 unsigned long ino); 176 unsigned long ino);
178extern int affs_write_inode(struct inode *inode, 177extern int affs_write_inode(struct inode *inode,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 322710c3eedf..c4a9875bd1a6 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
406 loff_t pos, unsigned len, unsigned flags, 406 loff_t pos, unsigned len, unsigned flags,
407 struct page **pagep, void **fsdata) 407 struct page **pagep, void **fsdata)
408{ 408{
409 int ret;
410
409 *pagep = NULL; 411 *pagep = NULL;
410 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 412 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
411 affs_get_block, 413 affs_get_block,
412 &AFFS_I(mapping->host)->mmu_private); 414 &AFFS_I(mapping->host)->mmu_private);
415 if (unlikely(ret)) {
416 loff_t isize = mapping->host->i_size;
417 if (pos + len > isize)
418 vmtruncate(mapping->host, isize);
419 }
420
421 return ret;
413} 422}
414 423
415static sector_t _affs_bmap(struct address_space *mapping, sector_t block) 424static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index f4b2a4ee4f91..3a0fdec175ba 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -235,31 +235,36 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
235 goto out; 235 goto out;
236 } 236 }
237 237
238 error = inode_setattr(inode, attr); 238 if ((attr->ia_valid & ATTR_SIZE) &&
239 if (!error && (attr->ia_valid & ATTR_MODE)) 239 attr->ia_size != i_size_read(inode)) {
240 error = vmtruncate(inode, attr->ia_size);
241 if (error)
242 return error;
243 }
244
245 setattr_copy(inode, attr);
246 mark_inode_dirty(inode);
247
248 if (attr->ia_valid & ATTR_MODE)
240 mode_to_prot(inode); 249 mode_to_prot(inode);
241out: 250out:
242 return error; 251 return error;
243} 252}
244 253
245void 254void
246affs_delete_inode(struct inode *inode) 255affs_evict_inode(struct inode *inode)
247{
248 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
249 truncate_inode_pages(&inode->i_data, 0);
250 inode->i_size = 0;
251 affs_truncate(inode);
252 clear_inode(inode);
253 affs_free_block(inode->i_sb, inode->i_ino);
254}
255
256void
257affs_clear_inode(struct inode *inode)
258{ 256{
259 unsigned long cache_page; 257 unsigned long cache_page;
258 pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
259 truncate_inode_pages(&inode->i_data, 0);
260 260
261 pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 261 if (!inode->i_nlink) {
262 inode->i_size = 0;
263 affs_truncate(inode);
264 }
262 265
266 invalidate_inode_buffers(inode);
267 end_writeback(inode);
263 affs_free_prealloc(inode); 268 affs_free_prealloc(inode);
264 cache_page = (unsigned long)AFFS_I(inode)->i_lc; 269 cache_page = (unsigned long)AFFS_I(inode)->i_lc;
265 if (cache_page) { 270 if (cache_page) {
@@ -271,6 +276,9 @@ affs_clear_inode(struct inode *inode)
271 affs_brelse(AFFS_I(inode)->i_ext_bh); 276 affs_brelse(AFFS_I(inode)->i_ext_bh);
272 AFFS_I(inode)->i_ext_last = ~1; 277 AFFS_I(inode)->i_ext_last = ~1;
273 AFFS_I(inode)->i_ext_bh = NULL; 278 AFFS_I(inode)->i_ext_bh = NULL;
279
280 if (!inode->i_nlink)
281 affs_free_block(inode->i_sb, inode->i_ino);
274} 282}
275 283
276struct inode * 284struct inode *
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 16a3e4765f68..33c4e7eef470 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -26,7 +26,7 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
26static int affs_remount (struct super_block *sb, int *flags, char *data); 26static int affs_remount (struct super_block *sb, int *flags, char *data);
27 27
28static void 28static void
29affs_commit_super(struct super_block *sb, int clean) 29affs_commit_super(struct super_block *sb, int wait, int clean)
30{ 30{
31 struct affs_sb_info *sbi = AFFS_SB(sb); 31 struct affs_sb_info *sbi = AFFS_SB(sb);
32 struct buffer_head *bh = sbi->s_root_bh; 32 struct buffer_head *bh = sbi->s_root_bh;
@@ -36,6 +36,8 @@ affs_commit_super(struct super_block *sb, int clean)
36 secs_to_datestamp(get_seconds(), &tail->disk_change); 36 secs_to_datestamp(get_seconds(), &tail->disk_change);
37 affs_fix_checksum(sb, bh); 37 affs_fix_checksum(sb, bh);
38 mark_buffer_dirty(bh); 38 mark_buffer_dirty(bh);
39 if (wait)
40 sync_dirty_buffer(bh);
39} 41}
40 42
41static void 43static void
@@ -46,8 +48,8 @@ affs_put_super(struct super_block *sb)
46 48
47 lock_kernel(); 49 lock_kernel();
48 50
49 if (!(sb->s_flags & MS_RDONLY)) 51 if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
50 affs_commit_super(sb, 1); 52 affs_commit_super(sb, 1, 1);
51 53
52 kfree(sbi->s_prefix); 54 kfree(sbi->s_prefix);
53 affs_free_bitmap(sb); 55 affs_free_bitmap(sb);
@@ -61,27 +63,20 @@ affs_put_super(struct super_block *sb)
61static void 63static void
62affs_write_super(struct super_block *sb) 64affs_write_super(struct super_block *sb)
63{ 65{
64 int clean = 2;
65
66 lock_super(sb); 66 lock_super(sb);
67 if (!(sb->s_flags & MS_RDONLY)) { 67 if (!(sb->s_flags & MS_RDONLY))
68 // if (sbi->s_bitmap[i].bm_bh) { 68 affs_commit_super(sb, 1, 2);
69 // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { 69 sb->s_dirt = 0;
70 // clean = 0;
71 affs_commit_super(sb, clean);
72 sb->s_dirt = !clean; /* redo until bitmap synced */
73 } else
74 sb->s_dirt = 0;
75 unlock_super(sb); 70 unlock_super(sb);
76 71
77 pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); 72 pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
78} 73}
79 74
80static int 75static int
81affs_sync_fs(struct super_block *sb, int wait) 76affs_sync_fs(struct super_block *sb, int wait)
82{ 77{
83 lock_super(sb); 78 lock_super(sb);
84 affs_commit_super(sb, 2); 79 affs_commit_super(sb, wait, 2);
85 sb->s_dirt = 0; 80 sb->s_dirt = 0;
86 unlock_super(sb); 81 unlock_super(sb);
87 return 0; 82 return 0;
@@ -140,8 +135,7 @@ static const struct super_operations affs_sops = {
140 .alloc_inode = affs_alloc_inode, 135 .alloc_inode = affs_alloc_inode,
141 .destroy_inode = affs_destroy_inode, 136 .destroy_inode = affs_destroy_inode,
142 .write_inode = affs_write_inode, 137 .write_inode = affs_write_inode,
143 .delete_inode = affs_delete_inode, 138 .evict_inode = affs_evict_inode,
144 .clear_inode = affs_clear_inode,
145 .put_super = affs_put_super, 139 .put_super = affs_put_super,
146 .write_super = affs_write_super, 140 .write_super = affs_write_super,
147 .sync_fs = affs_sync_fs, 141 .sync_fs = affs_sync_fs,
@@ -554,9 +548,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
554 return 0; 548 return 0;
555 } 549 }
556 if (*flags & MS_RDONLY) { 550 if (*flags & MS_RDONLY) {
557 sb->s_dirt = 1; 551 affs_write_super(sb);
558 while (sb->s_dirt)
559 affs_write_super(sb);
560 affs_free_bitmap(sb); 552 affs_free_bitmap(sb);
561 } else 553 } else
562 res = affs_init_bitmap(sb, flags); 554 res = affs_init_bitmap(sb, flags);
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 5c4e61d3c772..8f975f25b486 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -2,6 +2,7 @@ config AFS_FS
2 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2 tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select AF_RXRPC 4 select AF_RXRPC
5 select DNS_RESOLVER
5 help 6 help
6 If you say Y here, you will get an experimental Andrew File System 7 If you say Y here, you will get an experimental Andrew File System
7 driver. It currently only supports unsecured read-only AFS access. 8 driver. It currently only supports unsecured read-only AFS access.
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index e19c13f059ed..ffea35c63879 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/key.h> 14#include <linux/key.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/dns_resolver.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
17#include <keys/rxrpc-type.h> 18#include <keys/rxrpc-type.h>
18#include "internal.h" 19#include "internal.h"
@@ -36,6 +37,8 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
36 struct key *key; 37 struct key *key;
37 size_t namelen; 38 size_t namelen;
38 char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; 39 char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
40 char *dvllist = NULL, *_vllist = NULL;
41 char delimiter = ':';
39 int ret; 42 int ret;
40 43
41 _enter("%s,%s", name, vllist); 44 _enter("%s,%s", name, vllist);
@@ -43,8 +46,10 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
43 BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ 46 BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
44 47
45 namelen = strlen(name); 48 namelen = strlen(name);
46 if (namelen > AFS_MAXCELLNAME) 49 if (namelen > AFS_MAXCELLNAME) {
50 _leave(" = -ENAMETOOLONG");
47 return ERR_PTR(-ENAMETOOLONG); 51 return ERR_PTR(-ENAMETOOLONG);
52 }
48 53
49 /* allocate and initialise a cell record */ 54 /* allocate and initialise a cell record */
50 cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); 55 cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
@@ -64,15 +69,31 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
64 INIT_LIST_HEAD(&cell->vl_list); 69 INIT_LIST_HEAD(&cell->vl_list);
65 spin_lock_init(&cell->vl_lock); 70 spin_lock_init(&cell->vl_lock);
66 71
72 /* if the ip address is invalid, try dns query */
73 if (!vllist || strlen(vllist) < 7) {
74 ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
75 if (ret < 0) {
76 _leave(" = %d", ret);
77 return ERR_PTR(ret);
78 }
79 _vllist = dvllist;
80
81 /* change the delimiter for user-space reply */
82 delimiter = ',';
83
84 } else {
85 _vllist = vllist;
86 }
87
67 /* fill in the VL server list from the rest of the string */ 88 /* fill in the VL server list from the rest of the string */
68 do { 89 do {
69 unsigned a, b, c, d; 90 unsigned a, b, c, d;
70 91
71 next = strchr(vllist, ':'); 92 next = strchr(_vllist, delimiter);
72 if (next) 93 if (next)
73 *next++ = 0; 94 *next++ = 0;
74 95
75 if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) 96 if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
76 goto bad_address; 97 goto bad_address;
77 98
78 if (a > 255 || b > 255 || c > 255 || d > 255) 99 if (a > 255 || b > 255 || c > 255 || d > 255)
@@ -81,7 +102,7 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
81 cell->vl_addrs[cell->vl_naddrs++].s_addr = 102 cell->vl_addrs[cell->vl_naddrs++].s_addr =
82 htonl((a << 24) | (b << 16) | (c << 8) | d); 103 htonl((a << 24) | (b << 16) | (c << 8) | d);
83 104
84 } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next)); 105 } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
85 106
86 /* create a key to represent an anonymous user */ 107 /* create a key to represent an anonymous user */
87 memcpy(keyname, "afs@", 4); 108 memcpy(keyname, "afs@", 4);
@@ -110,6 +131,7 @@ bad_address:
110 ret = -EINVAL; 131 ret = -EINVAL;
111error: 132error:
112 key_put(cell->anonymous_key); 133 key_put(cell->anonymous_key);
134 kfree(dvllist);
113 kfree(cell); 135 kfree(cell);
114 _leave(" = %d", ret); 136 _leave(" = %d", ret);
115 return ERR_PTR(ret); 137 return ERR_PTR(ret);
@@ -201,14 +223,12 @@ int afs_cell_init(char *rootcell)
201 } 223 }
202 224
203 cp = strchr(rootcell, ':'); 225 cp = strchr(rootcell, ':');
204 if (!cp) { 226 if (!cp)
205 printk(KERN_ERR "kAFS: no VL server IP addresses specified\n"); 227 _debug("kAFS: no VL server IP addresses specified");
206 _leave(" = -EINVAL"); 228 else
207 return -EINVAL; 229 *cp++ = 0;
208 }
209 230
210 /* allocate a cell record for the root cell */ 231 /* allocate a cell record for the root cell */
211 *cp++ = 0;
212 new_root = afs_cell_create(rootcell, cp); 232 new_root = afs_cell_create(rootcell, cp);
213 if (IS_ERR(new_root)) { 233 if (IS_ERR(new_root)) {
214 _leave(" = %ld", PTR_ERR(new_root)); 234 _leave(" = %ld", PTR_ERR(new_root));
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d00b312e3110..320ffef11574 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -316,7 +316,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
316/* 316/*
317 * clear an AFS inode 317 * clear an AFS inode
318 */ 318 */
319void afs_clear_inode(struct inode *inode) 319void afs_evict_inode(struct inode *inode)
320{ 320{
321 struct afs_permits *permits; 321 struct afs_permits *permits;
322 struct afs_vnode *vnode; 322 struct afs_vnode *vnode;
@@ -335,6 +335,9 @@ void afs_clear_inode(struct inode *inode)
335 335
336 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); 336 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
337 337
338 truncate_inode_pages(&inode->i_data, 0);
339 end_writeback(inode);
340
338 afs_give_up_callback(vnode); 341 afs_give_up_callback(vnode);
339 342
340 if (vnode->server) { 343 if (vnode->server) {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5f679b77ce24..8679089ce9a1 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -565,7 +565,7 @@ extern void afs_zap_data(struct afs_vnode *);
565extern int afs_validate(struct afs_vnode *, struct key *); 565extern int afs_validate(struct afs_vnode *, struct key *);
566extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 566extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
567extern int afs_setattr(struct dentry *, struct iattr *); 567extern int afs_setattr(struct dentry *, struct iattr *);
568extern void afs_clear_inode(struct inode *); 568extern void afs_evict_inode(struct inode *);
569 569
570/* 570/*
571 * main.c 571 * main.c
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 66d54d348c55..cfd1cbe25b22 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -111,6 +111,8 @@ static int __init afs_init(void)
111 111
112 /* initialise the callback update process */ 112 /* initialise the callback update process */
113 ret = afs_callback_update_init(); 113 ret = afs_callback_update_init();
114 if (ret < 0)
115 goto error_callback_update_init;
114 116
115 /* create the RxRPC transport */ 117 /* create the RxRPC transport */
116 ret = afs_open_socket(); 118 ret = afs_open_socket();
@@ -127,15 +129,16 @@ static int __init afs_init(void)
127error_fs: 129error_fs:
128 afs_close_socket(); 130 afs_close_socket();
129error_open_socket: 131error_open_socket:
132 afs_callback_update_kill();
133error_callback_update_init:
134 afs_vlocation_purge();
130error_vl_update_init: 135error_vl_update_init:
136 afs_cell_purge();
131error_cell_init: 137error_cell_init:
132#ifdef CONFIG_AFS_FSCACHE 138#ifdef CONFIG_AFS_FSCACHE
133 fscache_unregister_netfs(&afs_cache_netfs); 139 fscache_unregister_netfs(&afs_cache_netfs);
134error_cache: 140error_cache:
135#endif 141#endif
136 afs_callback_update_kill();
137 afs_vlocation_purge();
138 afs_cell_purge();
139 afs_proc_cleanup(); 142 afs_proc_cleanup();
140 rcu_barrier(); 143 rcu_barrier();
141 printk(KERN_ERR "kAFS: failed to register: %d\n", ret); 144 printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e932e5a3a0c1..9cf80f02da16 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -49,7 +49,7 @@ static const struct super_operations afs_super_ops = {
49 .statfs = afs_statfs, 49 .statfs = afs_statfs,
50 .alloc_inode = afs_alloc_inode, 50 .alloc_inode = afs_alloc_inode,
51 .destroy_inode = afs_destroy_inode, 51 .destroy_inode = afs_destroy_inode,
52 .clear_inode = afs_clear_inode, 52 .evict_inode = afs_evict_inode,
53 .put_super = afs_put_super, 53 .put_super = afs_put_super,
54 .show_options = generic_show_options, 54 .show_options = generic_show_options,
55}; 55};
diff --git a/fs/aio.c b/fs/aio.c
index 1ccf25cef1f0..3006b5bc33d6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1277,7 +1277,7 @@ out:
1277/* sys_io_destroy: 1277/* sys_io_destroy:
1278 * Destroy the aio_context specified. May cancel any outstanding 1278 * Destroy the aio_context specified. May cancel any outstanding
1279 * AIOs and block on completion. Will fail with -ENOSYS if not 1279 * AIOs and block on completion. Will fail with -ENOSYS if not
1280 * implemented. May fail with -EFAULT if the context pointed to 1280 * implemented. May fail with -EINVAL if the context pointed to
1281 * is invalid. 1281 * is invalid.
1282 */ 1282 */
1283SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) 1283SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
@@ -1795,15 +1795,16 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1795 1795
1796/* io_getevents: 1796/* io_getevents:
1797 * Attempts to read at least min_nr events and up to nr events from 1797 * Attempts to read at least min_nr events and up to nr events from
1798 * the completion queue for the aio_context specified by ctx_id. May 1798 * the completion queue for the aio_context specified by ctx_id. If
1799 * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range, 1799 * it succeeds, the number of read events is returned. May fail with
1800 * if nr is out of range, if when is out of range. May fail with 1800 * -EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
1801 * -EFAULT if any of the memory specified to is invalid. May return 1801 * out of range, if timeout is out of range. May fail with -EFAULT
1802 * 0 or < min_nr if no events are available and the timeout specified 1802 * if any of the memory specified is invalid. May return 0 or
1803 * by when has elapsed, where when == NULL specifies an infinite 1803 * < min_nr if the timeout specified by timeout has elapsed
1804 * timeout. Note that the timeout pointed to by when is relative and 1804 * before sufficient events are available, where timeout == NULL
1805 * will be updated if not NULL and the operation blocks. Will fail 1805 * specifies an infinite timeout. Note that the timeout pointed to by
1806 * with -ENOSYS if not implemented. 1806 * timeout is relative and will be updated if not NULL and the
1807 * operation blocks. Will fail with -ENOSYS if not implemented.
1807 */ 1808 */
1808SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, 1809SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
1809 long, min_nr, 1810 long, min_nr,
diff --git a/fs/attr.c b/fs/attr.c
index b4fa3b0aa596..7ca41811afa1 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,35 +14,53 @@
14#include <linux/fcntl.h> 14#include <linux/fcntl.h>
15#include <linux/security.h> 15#include <linux/security.h>
16 16
17/* Taken over from the old code... */ 17/**
18 18 * inode_change_ok - check if attribute changes to an inode are allowed
19/* POSIX UID/GID verification for setting inode attributes. */ 19 * @inode: inode to check
20 * @attr: attributes to change
21 *
22 * Check if we are allowed to change the attributes contained in @attr
23 * in the given inode. This includes the normal unix access permission
24 * checks, as well as checks for rlimits and others.
25 *
26 * Should be called as the first thing in ->setattr implementations,
27 * possibly after taking additional locks.
28 */
20int inode_change_ok(const struct inode *inode, struct iattr *attr) 29int inode_change_ok(const struct inode *inode, struct iattr *attr)
21{ 30{
22 int retval = -EPERM;
23 unsigned int ia_valid = attr->ia_valid; 31 unsigned int ia_valid = attr->ia_valid;
24 32
33 /*
34 * First check size constraints. These can't be overriden using
35 * ATTR_FORCE.
36 */
37 if (ia_valid & ATTR_SIZE) {
38 int error = inode_newsize_ok(inode, attr->ia_size);
39 if (error)
40 return error;
41 }
42
25 /* If force is set do it anyway. */ 43 /* If force is set do it anyway. */
26 if (ia_valid & ATTR_FORCE) 44 if (ia_valid & ATTR_FORCE)
27 goto fine; 45 return 0;
28 46
29 /* Make sure a caller can chown. */ 47 /* Make sure a caller can chown. */
30 if ((ia_valid & ATTR_UID) && 48 if ((ia_valid & ATTR_UID) &&
31 (current_fsuid() != inode->i_uid || 49 (current_fsuid() != inode->i_uid ||
32 attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) 50 attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
33 goto error; 51 return -EPERM;
34 52
35 /* Make sure caller can chgrp. */ 53 /* Make sure caller can chgrp. */
36 if ((ia_valid & ATTR_GID) && 54 if ((ia_valid & ATTR_GID) &&
37 (current_fsuid() != inode->i_uid || 55 (current_fsuid() != inode->i_uid ||
38 (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && 56 (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
39 !capable(CAP_CHOWN)) 57 !capable(CAP_CHOWN))
40 goto error; 58 return -EPERM;
41 59
42 /* Make sure a caller can chmod. */ 60 /* Make sure a caller can chmod. */
43 if (ia_valid & ATTR_MODE) { 61 if (ia_valid & ATTR_MODE) {
44 if (!is_owner_or_cap(inode)) 62 if (!is_owner_or_cap(inode))
45 goto error; 63 return -EPERM;
46 /* Also check the setgid bit! */ 64 /* Also check the setgid bit! */
47 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
48 inode->i_gid) && !capable(CAP_FSETID)) 66 inode->i_gid) && !capable(CAP_FSETID))
@@ -52,12 +70,10 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
52 /* Check for setting the inode time. */ 70 /* Check for setting the inode time. */
53 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { 71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
54 if (!is_owner_or_cap(inode)) 72 if (!is_owner_or_cap(inode))
55 goto error; 73 return -EPERM;
56 } 74 }
57fine: 75
58 retval = 0; 76 return 0;
59error:
60 return retval;
61} 77}
62EXPORT_SYMBOL(inode_change_ok); 78EXPORT_SYMBOL(inode_change_ok);
63 79
@@ -105,21 +121,21 @@ out_big:
105EXPORT_SYMBOL(inode_newsize_ok); 121EXPORT_SYMBOL(inode_newsize_ok);
106 122
107/** 123/**
108 * generic_setattr - copy simple metadata updates into the generic inode 124 * setattr_copy - copy simple metadata updates into the generic inode
109 * @inode: the inode to be updated 125 * @inode: the inode to be updated
110 * @attr: the new attributes 126 * @attr: the new attributes
111 * 127 *
112 * generic_setattr must be called with i_mutex held. 128 * setattr_copy must be called with i_mutex held.
113 * 129 *
114 * generic_setattr updates the inode's metadata with that specified 130 * setattr_copy updates the inode's metadata with that specified
115 * in attr. Noticably missing is inode size update, which is more complex 131 * in attr. Noticably missing is inode size update, which is more complex
116 * as it requires pagecache updates. See simple_setsize. 132 * as it requires pagecache updates.
117 * 133 *
118 * The inode is not marked as dirty after this operation. The rationale is 134 * The inode is not marked as dirty after this operation. The rationale is
119 * that for "simple" filesystems, the struct inode is the inode storage. 135 * that for "simple" filesystems, the struct inode is the inode storage.
120 * The caller is free to mark the inode dirty afterwards if needed. 136 * The caller is free to mark the inode dirty afterwards if needed.
121 */ 137 */
122void generic_setattr(struct inode *inode, const struct iattr *attr) 138void setattr_copy(struct inode *inode, const struct iattr *attr)
123{ 139{
124 unsigned int ia_valid = attr->ia_valid; 140 unsigned int ia_valid = attr->ia_valid;
125 141
@@ -144,32 +160,7 @@ void generic_setattr(struct inode *inode, const struct iattr *attr)
144 inode->i_mode = mode; 160 inode->i_mode = mode;
145 } 161 }
146} 162}
147EXPORT_SYMBOL(generic_setattr); 163EXPORT_SYMBOL(setattr_copy);
148
149/*
150 * note this function is deprecated, the new truncate sequence should be
151 * used instead -- see eg. simple_setsize, generic_setattr.
152 */
153int inode_setattr(struct inode *inode, const struct iattr *attr)
154{
155 unsigned int ia_valid = attr->ia_valid;
156
157 if (ia_valid & ATTR_SIZE &&
158 attr->ia_size != i_size_read(inode)) {
159 int error;
160
161 error = vmtruncate(inode, attr->ia_size);
162 if (error)
163 return error;
164 }
165
166 generic_setattr(inode, attr);
167
168 mark_inode_dirty(inode);
169
170 return 0;
171}
172EXPORT_SYMBOL(inode_setattr);
173 164
174int notify_change(struct dentry * dentry, struct iattr * attr) 165int notify_change(struct dentry * dentry, struct iattr * attr)
175{ 166{
@@ -237,13 +228,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
237 if (ia_valid & ATTR_SIZE) 228 if (ia_valid & ATTR_SIZE)
238 down_write(&dentry->d_inode->i_alloc_sem); 229 down_write(&dentry->d_inode->i_alloc_sem);
239 230
240 if (inode->i_op && inode->i_op->setattr) { 231 if (inode->i_op->setattr)
241 error = inode->i_op->setattr(dentry, attr); 232 error = inode->i_op->setattr(dentry, attr);
242 } else { 233 else
243 error = inode_change_ok(inode, attr); 234 error = simple_setattr(dentry, attr);
244 if (!error)
245 error = inode_setattr(inode, attr);
246 }
247 235
248 if (ia_valid & ATTR_SIZE) 236 if (ia_valid & ATTR_SIZE)
249 up_write(&dentry->d_inode->i_alloc_sem); 237 up_write(&dentry->d_inode->i_alloc_sem);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 34ddda888e63..dc39d2824885 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -436,7 +436,7 @@ befs_init_inodecache(void)
436 init_once); 436 init_once);
437 if (befs_inode_cachep == NULL) { 437 if (befs_inode_cachep == NULL) {
438 printk(KERN_ERR "befs_init_inodecache: " 438 printk(KERN_ERR "befs_init_inodecache: "
439 "Couldn't initalize inode slabcache\n"); 439 "Couldn't initialize inode slabcache\n");
440 return -ENOMEM; 440 return -ENOMEM;
441 } 441 }
442 442
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 7109e451abf7..f7f87e233dd9 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -17,7 +17,6 @@ struct bfs_sb_info {
17 unsigned long si_lf_eblk; 17 unsigned long si_lf_eblk;
18 unsigned long si_lasti; 18 unsigned long si_lasti;
19 unsigned long *si_imap; 19 unsigned long *si_imap;
20 struct buffer_head *si_sbh; /* buffer header w/superblock */
21 struct mutex bfs_lock; 20 struct mutex bfs_lock;
22}; 21};
23 22
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 88b9a3ff44e4..eb67edd0f8ea 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -70,7 +70,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
70 struct super_block *sb = inode->i_sb; 70 struct super_block *sb = inode->i_sb;
71 struct bfs_sb_info *info = BFS_SB(sb); 71 struct bfs_sb_info *info = BFS_SB(sb);
72 struct bfs_inode_info *bi = BFS_I(inode); 72 struct bfs_inode_info *bi = BFS_I(inode);
73 struct buffer_head *sbh = info->si_sbh;
74 73
75 phys = bi->i_sblock + block; 74 phys = bi->i_sblock + block;
76 if (!create) { 75 if (!create) {
@@ -112,7 +111,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
112 info->si_freeb -= phys - bi->i_eblock; 111 info->si_freeb -= phys - bi->i_eblock;
113 info->si_lf_eblk = bi->i_eblock = phys; 112 info->si_lf_eblk = bi->i_eblock = phys;
114 mark_inode_dirty(inode); 113 mark_inode_dirty(inode);
115 mark_buffer_dirty(sbh);
116 err = 0; 114 err = 0;
117 goto out; 115 goto out;
118 } 116 }
@@ -147,7 +145,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
147 */ 145 */
148 info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks; 146 info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks;
149 mark_inode_dirty(inode); 147 mark_inode_dirty(inode);
150 mark_buffer_dirty(sbh);
151 map_bh(bh_result, sb, phys); 148 map_bh(bh_result, sb, phys);
152out: 149out:
153 mutex_unlock(&info->bfs_lock); 150 mutex_unlock(&info->bfs_lock);
@@ -168,9 +165,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping,
168 loff_t pos, unsigned len, unsigned flags, 165 loff_t pos, unsigned len, unsigned flags,
169 struct page **pagep, void **fsdata) 166 struct page **pagep, void **fsdata)
170{ 167{
171 *pagep = NULL; 168 int ret;
172 return block_write_begin(file, mapping, pos, len, flags, 169
173 pagep, fsdata, bfs_get_block); 170 ret = block_write_begin(mapping, pos, len, flags, pagep,
171 bfs_get_block);
172 if (unlikely(ret)) {
173 loff_t isize = mapping->host->i_size;
174 if (pos + len > isize)
175 vmtruncate(mapping->host, isize);
176 }
177
178 return ret;
174} 179}
175 180
176static sector_t bfs_bmap(struct address_space *mapping, sector_t block) 181static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f22a7d3dc362..c4daf0f5fc02 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -31,7 +31,6 @@ MODULE_LICENSE("GPL");
31#define dprintf(x...) 31#define dprintf(x...)
32#endif 32#endif
33 33
34static void bfs_write_super(struct super_block *s);
35void dump_imap(const char *prefix, struct super_block *s); 34void dump_imap(const char *prefix, struct super_block *s);
36 35
37struct inode *bfs_iget(struct super_block *sb, unsigned long ino) 36struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -99,6 +98,24 @@ error:
99 return ERR_PTR(-EIO); 98 return ERR_PTR(-EIO);
100} 99}
101 100
101static struct bfs_inode *find_inode(struct super_block *sb, u16 ino, struct buffer_head **p)
102{
103 if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(sb)->si_lasti)) {
104 printf("Bad inode number %s:%08x\n", sb->s_id, ino);
105 return ERR_PTR(-EIO);
106 }
107
108 ino -= BFS_ROOT_INO;
109
110 *p = sb_bread(sb, 1 + ino / BFS_INODES_PER_BLOCK);
111 if (!*p) {
112 printf("Unable to read inode %s:%08x\n", sb->s_id, ino);
113 return ERR_PTR(-EIO);
114 }
115
116 return (struct bfs_inode *)(*p)->b_data + ino % BFS_INODES_PER_BLOCK;
117}
118
102static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) 119static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
103{ 120{
104 struct bfs_sb_info *info = BFS_SB(inode->i_sb); 121 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -106,28 +123,15 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
106 unsigned long i_sblock; 123 unsigned long i_sblock;
107 struct bfs_inode *di; 124 struct bfs_inode *di;
108 struct buffer_head *bh; 125 struct buffer_head *bh;
109 int block, off;
110 int err = 0; 126 int err = 0;
111 127
112 dprintf("ino=%08x\n", ino); 128 dprintf("ino=%08x\n", ino);
113 129
114 if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { 130 di = find_inode(inode->i_sb, ino, &bh);
115 printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); 131 if (IS_ERR(di))
116 return -EIO; 132 return PTR_ERR(di);
117 }
118 133
119 mutex_lock(&info->bfs_lock); 134 mutex_lock(&info->bfs_lock);
120 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
121 bh = sb_bread(inode->i_sb, block);
122 if (!bh) {
123 printf("Unable to read inode %s:%08x\n",
124 inode->i_sb->s_id, ino);
125 mutex_unlock(&info->bfs_lock);
126 return -EIO;
127 }
128
129 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
130 di = (struct bfs_inode *)bh->b_data + off;
131 135
132 if (ino == BFS_ROOT_INO) 136 if (ino == BFS_ROOT_INO)
133 di->i_vtype = cpu_to_le32(BFS_VDIR); 137 di->i_vtype = cpu_to_le32(BFS_VDIR);
@@ -158,12 +162,11 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
158 return err; 162 return err;
159} 163}
160 164
161static void bfs_delete_inode(struct inode *inode) 165static void bfs_evict_inode(struct inode *inode)
162{ 166{
163 unsigned long ino = inode->i_ino; 167 unsigned long ino = inode->i_ino;
164 struct bfs_inode *di; 168 struct bfs_inode *di;
165 struct buffer_head *bh; 169 struct buffer_head *bh;
166 int block, off;
167 struct super_block *s = inode->i_sb; 170 struct super_block *s = inode->i_sb;
168 struct bfs_sb_info *info = BFS_SB(s); 171 struct bfs_sb_info *info = BFS_SB(s);
169 struct bfs_inode_info *bi = BFS_I(inode); 172 struct bfs_inode_info *bi = BFS_I(inode);
@@ -171,28 +174,19 @@ static void bfs_delete_inode(struct inode *inode)
171 dprintf("ino=%08lx\n", ino); 174 dprintf("ino=%08lx\n", ino);
172 175
173 truncate_inode_pages(&inode->i_data, 0); 176 truncate_inode_pages(&inode->i_data, 0);
177 invalidate_inode_buffers(inode);
178 end_writeback(inode);
174 179
175 if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) { 180 if (inode->i_nlink)
176 printf("invalid ino=%08lx\n", ino);
177 return; 181 return;
178 }
179
180 inode->i_size = 0;
181 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
182 mutex_lock(&info->bfs_lock);
183 mark_inode_dirty(inode);
184 182
185 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 183 di = find_inode(s, inode->i_ino, &bh);
186 bh = sb_bread(s, block); 184 if (IS_ERR(di))
187 if (!bh) {
188 printf("Unable to read inode %s:%08lx\n",
189 inode->i_sb->s_id, ino);
190 mutex_unlock(&info->bfs_lock);
191 return; 185 return;
192 } 186
193 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 187 mutex_lock(&info->bfs_lock);
194 di = (struct bfs_inode *)bh->b_data + off; 188 /* clear on-disk inode */
195 memset((void *)di, 0, sizeof(struct bfs_inode)); 189 memset(di, 0, sizeof(struct bfs_inode));
196 mark_buffer_dirty(bh); 190 mark_buffer_dirty(bh);
197 brelse(bh); 191 brelse(bh);
198 192
@@ -209,32 +203,9 @@ static void bfs_delete_inode(struct inode *inode)
209 * "last block of the last file" even if there is no 203 * "last block of the last file" even if there is no
210 * real file there, saves us 1 gap. 204 * real file there, saves us 1 gap.
211 */ 205 */
212 if (info->si_lf_eblk == bi->i_eblock) { 206 if (info->si_lf_eblk == bi->i_eblock)
213 info->si_lf_eblk = bi->i_sblock - 1; 207 info->si_lf_eblk = bi->i_sblock - 1;
214 mark_buffer_dirty(info->si_sbh);
215 }
216 mutex_unlock(&info->bfs_lock); 208 mutex_unlock(&info->bfs_lock);
217 clear_inode(inode);
218}
219
220static int bfs_sync_fs(struct super_block *sb, int wait)
221{
222 struct bfs_sb_info *info = BFS_SB(sb);
223
224 mutex_lock(&info->bfs_lock);
225 mark_buffer_dirty(info->si_sbh);
226 sb->s_dirt = 0;
227 mutex_unlock(&info->bfs_lock);
228
229 return 0;
230}
231
232static void bfs_write_super(struct super_block *sb)
233{
234 if (!(sb->s_flags & MS_RDONLY))
235 bfs_sync_fs(sb, 1);
236 else
237 sb->s_dirt = 0;
238} 209}
239 210
240static void bfs_put_super(struct super_block *s) 211static void bfs_put_super(struct super_block *s)
@@ -246,10 +217,6 @@ static void bfs_put_super(struct super_block *s)
246 217
247 lock_kernel(); 218 lock_kernel();
248 219
249 if (s->s_dirt)
250 bfs_write_super(s);
251
252 brelse(info->si_sbh);
253 mutex_destroy(&info->bfs_lock); 220 mutex_destroy(&info->bfs_lock);
254 kfree(info->si_imap); 221 kfree(info->si_imap);
255 kfree(info); 222 kfree(info);
@@ -319,10 +286,8 @@ static const struct super_operations bfs_sops = {
319 .alloc_inode = bfs_alloc_inode, 286 .alloc_inode = bfs_alloc_inode,
320 .destroy_inode = bfs_destroy_inode, 287 .destroy_inode = bfs_destroy_inode,
321 .write_inode = bfs_write_inode, 288 .write_inode = bfs_write_inode,
322 .delete_inode = bfs_delete_inode, 289 .evict_inode = bfs_evict_inode,
323 .put_super = bfs_put_super, 290 .put_super = bfs_put_super,
324 .write_super = bfs_write_super,
325 .sync_fs = bfs_sync_fs,
326 .statfs = bfs_statfs, 291 .statfs = bfs_statfs,
327}; 292};
328 293
@@ -349,7 +314,7 @@ void dump_imap(const char *prefix, struct super_block *s)
349 314
350static int bfs_fill_super(struct super_block *s, void *data, int silent) 315static int bfs_fill_super(struct super_block *s, void *data, int silent)
351{ 316{
352 struct buffer_head *bh; 317 struct buffer_head *bh, *sbh;
353 struct bfs_super_block *bfs_sb; 318 struct bfs_super_block *bfs_sb;
354 struct inode *inode; 319 struct inode *inode;
355 unsigned i, imap_len; 320 unsigned i, imap_len;
@@ -365,10 +330,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
365 330
366 sb_set_blocksize(s, BFS_BSIZE); 331 sb_set_blocksize(s, BFS_BSIZE);
367 332
368 info->si_sbh = sb_bread(s, 0); 333 sbh = sb_bread(s, 0);
369 if (!info->si_sbh) 334 if (!sbh)
370 goto out; 335 goto out;
371 bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data; 336 bfs_sb = (struct bfs_super_block *)sbh->b_data;
372 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { 337 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
373 if (!silent) 338 if (!silent)
374 printf("No BFS filesystem on %s (magic=%08x)\n", 339 printf("No BFS filesystem on %s (magic=%08x)\n",
@@ -472,10 +437,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
472 info->si_lf_eblk = eblock; 437 info->si_lf_eblk = eblock;
473 } 438 }
474 brelse(bh); 439 brelse(bh);
475 if (!(s->s_flags & MS_RDONLY)) { 440 brelse(sbh);
476 mark_buffer_dirty(info->si_sbh);
477 s->s_dirt = 1;
478 }
479 dump_imap("read_super", s); 441 dump_imap("read_super", s);
480 return 0; 442 return 0;
481 443
@@ -485,7 +447,7 @@ out3:
485out2: 447out2:
486 kfree(info->si_imap); 448 kfree(info->si_imap);
487out1: 449out1:
488 brelse(info->si_sbh); 450 brelse(sbh);
489out: 451out:
490 mutex_destroy(&info->bfs_lock); 452 mutex_destroy(&info->bfs_lock);
491 kfree(info); 453 kfree(info);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c4e83537ead7..9e60fd201716 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -502,8 +502,9 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
502 return inode; 502 return inode;
503} 503}
504 504
505static void bm_clear_inode(struct inode *inode) 505static void bm_evict_inode(struct inode *inode)
506{ 506{
507 end_writeback(inode);
507 kfree(inode->i_private); 508 kfree(inode->i_private);
508} 509}
509 510
@@ -685,7 +686,7 @@ static const struct file_operations bm_status_operations = {
685 686
686static const struct super_operations s_ops = { 687static const struct super_operations s_ops = {
687 .statfs = simple_statfs, 688 .statfs = simple_statfs,
688 .clear_inode = bm_clear_inode, 689 .evict_inode = bm_evict_inode,
689}; 690};
690 691
691static int bm_fill_super(struct super_block * sb, void * data, int silent) 692static int bm_fill_super(struct super_block * sb, void * data, int silent)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99d6af811747..451afbd543b5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,9 +172,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
172 struct file *file = iocb->ki_filp; 172 struct file *file = iocb->ki_filp;
173 struct inode *inode = file->f_mapping->host; 173 struct inode *inode = file->f_mapping->host;
174 174
175 return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode, 175 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
176 I_BDEV(inode), iov, offset, nr_segs, 176 nr_segs, blkdev_get_blocks, NULL, NULL, 0);
177 blkdev_get_blocks, NULL);
178} 177}
179 178
180int __sync_blockdev(struct block_device *bdev, int wait) 179int __sync_blockdev(struct block_device *bdev, int wait)
@@ -309,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
309 loff_t pos, unsigned len, unsigned flags, 308 loff_t pos, unsigned len, unsigned flags,
310 struct page **pagep, void **fsdata) 309 struct page **pagep, void **fsdata)
311{ 310{
312 *pagep = NULL; 311 return block_write_begin(mapping, pos, len, flags, pagep,
313 return block_write_begin_newtrunc(file, mapping, pos, len, flags, 312 blkdev_get_block);
314 pagep, fsdata, blkdev_get_block);
315} 313}
316 314
317static int blkdev_write_end(struct file *file, struct address_space *mapping, 315static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -428,10 +426,13 @@ static inline void __bd_forget(struct inode *inode)
428 inode->i_mapping = &inode->i_data; 426 inode->i_mapping = &inode->i_data;
429} 427}
430 428
431static void bdev_clear_inode(struct inode *inode) 429static void bdev_evict_inode(struct inode *inode)
432{ 430{
433 struct block_device *bdev = &BDEV_I(inode)->bdev; 431 struct block_device *bdev = &BDEV_I(inode)->bdev;
434 struct list_head *p; 432 struct list_head *p;
433 truncate_inode_pages(&inode->i_data, 0);
434 invalidate_inode_buffers(inode); /* is it needed here? */
435 end_writeback(inode);
435 spin_lock(&bdev_lock); 436 spin_lock(&bdev_lock);
436 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { 437 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
437 __bd_forget(list_entry(p, struct inode, i_devices)); 438 __bd_forget(list_entry(p, struct inode, i_devices));
@@ -445,7 +446,7 @@ static const struct super_operations bdev_sops = {
445 .alloc_inode = bdev_alloc_inode, 446 .alloc_inode = bdev_alloc_inode,
446 .destroy_inode = bdev_destroy_inode, 447 .destroy_inode = bdev_destroy_inode,
447 .drop_inode = generic_delete_inode, 448 .drop_inode = generic_delete_inode,
448 .clear_inode = bdev_clear_inode, 449 .evict_inode = bdev_evict_inode,
449}; 450};
450 451
451static int bd_get_sb(struct file_system_type *fs_type, 452static int bd_get_sb(struct file_system_type *fs_type,
@@ -681,8 +682,8 @@ retry:
681 if (!bd_may_claim(bdev, whole, holder)) 682 if (!bd_may_claim(bdev, whole, holder))
682 return -EBUSY; 683 return -EBUSY;
683 684
684 /* if someone else is claiming, wait for it to finish */ 685 /* if claiming is already in progress, wait for it to finish */
685 if (whole->bd_claiming && whole->bd_claiming != holder) { 686 if (whole->bd_claiming) {
686 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); 687 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
687 DEFINE_WAIT(wait); 688 DEFINE_WAIT(wait);
688 689
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 29c20092847e..eaf286abad17 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2389,13 +2389,13 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
2389 pgoff_t offset, pgoff_t last_index); 2389 pgoff_t offset, pgoff_t last_index);
2390int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2390int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2391int btrfs_readpage(struct file *file, struct page *page); 2391int btrfs_readpage(struct file *file, struct page *page);
2392void btrfs_delete_inode(struct inode *inode); 2392void btrfs_evict_inode(struct inode *inode);
2393void btrfs_put_inode(struct inode *inode); 2393void btrfs_put_inode(struct inode *inode);
2394int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2394int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2395void btrfs_dirty_inode(struct inode *inode); 2395void btrfs_dirty_inode(struct inode *inode);
2396struct inode *btrfs_alloc_inode(struct super_block *sb); 2396struct inode *btrfs_alloc_inode(struct super_block *sb);
2397void btrfs_destroy_inode(struct inode *inode); 2397void btrfs_destroy_inode(struct inode *inode);
2398void btrfs_drop_inode(struct inode *inode); 2398int btrfs_drop_inode(struct inode *inode);
2399int btrfs_init_cachep(void); 2399int btrfs_init_cachep(void);
2400void btrfs_destroy_cachep(void); 2400void btrfs_destroy_cachep(void);
2401long btrfs_ioctl_trans_end(struct file *file); 2401long btrfs_ioctl_trans_end(struct file *file);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1bff92ad4744..8976c3343a96 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2938,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2938 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2938 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2939 ret = btrfs_update_inode(trans, root, dir); 2939 ret = btrfs_update_inode(trans, root, dir);
2940 BUG_ON(ret); 2940 BUG_ON(ret);
2941 dir->i_sb->s_dirt = 1;
2942 2941
2943 btrfs_free_path(path); 2942 btrfs_free_path(path);
2944 return 0; 2943 return 0;
@@ -3656,17 +3655,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3656 if (err) 3655 if (err)
3657 return err; 3656 return err;
3658 } 3657 }
3659 attr->ia_valid &= ~ATTR_SIZE;
3660 3658
3661 if (attr->ia_valid) 3659 if (attr->ia_valid) {
3662 err = inode_setattr(inode, attr); 3660 setattr_copy(inode, attr);
3661 mark_inode_dirty(inode);
3662
3663 if (attr->ia_valid & ATTR_MODE)
3664 err = btrfs_acl_chmod(inode);
3665 }
3663 3666
3664 if (!err && ((attr->ia_valid & ATTR_MODE)))
3665 err = btrfs_acl_chmod(inode);
3666 return err; 3667 return err;
3667} 3668}
3668 3669
3669void btrfs_delete_inode(struct inode *inode) 3670void btrfs_evict_inode(struct inode *inode)
3670{ 3671{
3671 struct btrfs_trans_handle *trans; 3672 struct btrfs_trans_handle *trans;
3672 struct btrfs_root *root = BTRFS_I(inode)->root; 3673 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3674,10 +3675,14 @@ void btrfs_delete_inode(struct inode *inode)
3674 int ret; 3675 int ret;
3675 3676
3676 truncate_inode_pages(&inode->i_data, 0); 3677 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
3679 goto no_delete;
3680
3677 if (is_bad_inode(inode)) { 3681 if (is_bad_inode(inode)) {
3678 btrfs_orphan_del(NULL, inode); 3682 btrfs_orphan_del(NULL, inode);
3679 goto no_delete; 3683 goto no_delete;
3680 } 3684 }
3685 /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
3681 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3686 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3682 3687
3683 if (root->fs_info->log_root_recovering) { 3688 if (root->fs_info->log_root_recovering) {
@@ -3727,7 +3732,7 @@ void btrfs_delete_inode(struct inode *inode)
3727 btrfs_end_transaction(trans, root); 3732 btrfs_end_transaction(trans, root);
3728 btrfs_btree_balance_dirty(root, nr); 3733 btrfs_btree_balance_dirty(root, nr);
3729no_delete: 3734no_delete:
3730 clear_inode(inode); 3735 end_writeback(inode);
3731 return; 3736 return;
3732} 3737}
3733 3738
@@ -3858,7 +3863,7 @@ again:
3858 p = &parent->rb_right; 3863 p = &parent->rb_right;
3859 else { 3864 else {
3860 WARN_ON(!(entry->vfs_inode.i_state & 3865 WARN_ON(!(entry->vfs_inode.i_state &
3861 (I_WILL_FREE | I_FREEING | I_CLEAR))); 3866 (I_WILL_FREE | I_FREEING)));
3862 rb_erase(parent, &root->inode_tree); 3867 rb_erase(parent, &root->inode_tree);
3863 RB_CLEAR_NODE(parent); 3868 RB_CLEAR_NODE(parent);
3864 spin_unlock(&root->inode_lock); 3869 spin_unlock(&root->inode_lock);
@@ -3937,7 +3942,7 @@ again:
3937 if (atomic_read(&inode->i_count) > 1) 3942 if (atomic_read(&inode->i_count) > 1)
3938 d_prune_aliases(inode); 3943 d_prune_aliases(inode);
3939 /* 3944 /*
3940 * btrfs_drop_inode will remove it from 3945 * btrfs_drop_inode will have it removed from
3941 * the inode cache when its usage count 3946 * the inode cache when its usage count
3942 * hits zero. 3947 * hits zero.
3943 */ 3948 */
@@ -6331,13 +6336,14 @@ free:
6331 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6336 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
6332} 6337}
6333 6338
6334void btrfs_drop_inode(struct inode *inode) 6339int btrfs_drop_inode(struct inode *inode)
6335{ 6340{
6336 struct btrfs_root *root = BTRFS_I(inode)->root; 6341 struct btrfs_root *root = BTRFS_I(inode)->root;
6337 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 6342
6338 generic_delete_inode(inode); 6343 if (btrfs_root_refs(&root->root_item) == 0)
6344 return 1;
6339 else 6345 else
6340 generic_drop_inode(inode); 6346 return generic_drop_inode(inode);
6341} 6347}
6342 6348
6343static void init_once(void *foo) 6349static void init_once(void *foo)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2393b390318..1776dbd8dc98 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -797,7 +797,7 @@ static int btrfs_unfreeze(struct super_block *sb)
797 797
798static const struct super_operations btrfs_super_ops = { 798static const struct super_operations btrfs_super_ops = {
799 .drop_inode = btrfs_drop_inode, 799 .drop_inode = btrfs_drop_inode,
800 .delete_inode = btrfs_delete_inode, 800 .evict_inode = btrfs_evict_inode,
801 .put_super = btrfs_put_super, 801 .put_super = btrfs_put_super,
802 .sync_fs = btrfs_sync_fs, 802 .sync_fs = btrfs_sync_fs,
803 .show_options = btrfs_show_options, 803 .show_options = btrfs_show_options,
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e9..50efa339e051 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1833} 1833}
1834EXPORT_SYMBOL(page_zero_new_buffers); 1834EXPORT_SYMBOL(page_zero_new_buffers);
1835 1835
1836static int __block_prepare_write(struct inode *inode, struct page *page, 1836int block_prepare_write(struct page *page, unsigned from, unsigned to,
1837 unsigned from, unsigned to, get_block_t *get_block) 1837 get_block_t *get_block)
1838{ 1838{
1839 struct inode *inode = page->mapping->host;
1839 unsigned block_start, block_end; 1840 unsigned block_start, block_end;
1840 sector_t block; 1841 sector_t block;
1841 int err = 0; 1842 int err = 0;
@@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1908 if (!buffer_uptodate(*wait_bh)) 1909 if (!buffer_uptodate(*wait_bh))
1909 err = -EIO; 1910 err = -EIO;
1910 } 1911 }
1911 if (unlikely(err)) 1912 if (unlikely(err)) {
1912 page_zero_new_buffers(page, from, to); 1913 page_zero_new_buffers(page, from, to);
1914 ClearPageUptodate(page);
1915 }
1913 return err; 1916 return err;
1914} 1917}
1918EXPORT_SYMBOL(block_prepare_write);
1915 1919
1916static int __block_commit_write(struct inode *inode, struct page *page, 1920static int __block_commit_write(struct inode *inode, struct page *page,
1917 unsigned from, unsigned to) 1921 unsigned from, unsigned to)
@@ -1948,90 +1952,41 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1948 return 0; 1952 return 0;
1949} 1953}
1950 1954
1951/* 1955int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1952 * Filesystems implementing the new truncate sequence should use the 1956 get_block_t *get_block)
1953 * _newtrunc postfix variant which won't incorrectly call vmtruncate.
1954 * The filesystem needs to handle block truncation upon failure.
1955 */
1956int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
1957 loff_t pos, unsigned len, unsigned flags,
1958 struct page **pagep, void **fsdata,
1959 get_block_t *get_block)
1960{ 1957{
1961 struct inode *inode = mapping->host; 1958 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1962 int status = 0;
1963 struct page *page;
1964 pgoff_t index;
1965 unsigned start, end;
1966 int ownpage = 0;
1967
1968 index = pos >> PAGE_CACHE_SHIFT;
1969 start = pos & (PAGE_CACHE_SIZE - 1);
1970 end = start + len;
1971
1972 page = *pagep;
1973 if (page == NULL) {
1974 ownpage = 1;
1975 page = grab_cache_page_write_begin(mapping, index, flags);
1976 if (!page) {
1977 status = -ENOMEM;
1978 goto out;
1979 }
1980 *pagep = page;
1981 } else
1982 BUG_ON(!PageLocked(page));
1983
1984 status = __block_prepare_write(inode, page, start, end, get_block);
1985 if (unlikely(status)) {
1986 ClearPageUptodate(page);
1987 1959
1988 if (ownpage) { 1960 return block_prepare_write(page, start, start + len, get_block);
1989 unlock_page(page);
1990 page_cache_release(page);
1991 *pagep = NULL;
1992 }
1993 }
1994
1995out:
1996 return status;
1997} 1961}
1998EXPORT_SYMBOL(block_write_begin_newtrunc); 1962EXPORT_SYMBOL(__block_write_begin);
1999 1963
2000/* 1964/*
2001 * block_write_begin takes care of the basic task of block allocation and 1965 * block_write_begin takes care of the basic task of block allocation and
2002 * bringing partial write blocks uptodate first. 1966 * bringing partial write blocks uptodate first.
2003 * 1967 *
2004 * If *pagep is not NULL, then block_write_begin uses the locked page 1968 * The filesystem needs to handle block truncation upon failure.
2005 * at *pagep rather than allocating its own. In this case, the page will
2006 * not be unlocked or deallocated on failure.
2007 */ 1969 */
2008int block_write_begin(struct file *file, struct address_space *mapping, 1970int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2009 loff_t pos, unsigned len, unsigned flags, 1971 unsigned flags, struct page **pagep, get_block_t *get_block)
2010 struct page **pagep, void **fsdata,
2011 get_block_t *get_block)
2012{ 1972{
2013 int ret; 1973 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1974 struct page *page;
1975 int status;
2014 1976
2015 ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, 1977 page = grab_cache_page_write_begin(mapping, index, flags);
2016 pagep, fsdata, get_block); 1978 if (!page)
1979 return -ENOMEM;
2017 1980
2018 /* 1981 status = __block_write_begin(page, pos, len, get_block);
2019 * prepare_write() may have instantiated a few blocks 1982 if (unlikely(status)) {
2020 * outside i_size. Trim these off again. Don't need 1983 unlock_page(page);
2021 * i_size_read because we hold i_mutex. 1984 page_cache_release(page);
2022 * 1985 page = NULL;
2023 * Filesystems which pass down their own page also cannot
2024 * call into vmtruncate here because it would lead to lock
2025 * inversion problems (*pagep is locked). This is a further
2026 * example of where the old truncate sequence is inadequate.
2027 */
2028 if (unlikely(ret) && *pagep == NULL) {
2029 loff_t isize = mapping->host->i_size;
2030 if (pos + len > isize)
2031 vmtruncate(mapping->host, isize);
2032 } 1986 }
2033 1987
2034 return ret; 1988 *pagep = page;
1989 return status;
2035} 1990}
2036EXPORT_SYMBOL(block_write_begin); 1991EXPORT_SYMBOL(block_write_begin);
2037 1992
@@ -2351,7 +2306,7 @@ out:
2351 * For moronic filesystems that do not allow holes in file. 2306 * For moronic filesystems that do not allow holes in file.
2352 * We may have to extend the file. 2307 * We may have to extend the file.
2353 */ 2308 */
2354int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2309int cont_write_begin(struct file *file, struct address_space *mapping,
2355 loff_t pos, unsigned len, unsigned flags, 2310 loff_t pos, unsigned len, unsigned flags,
2356 struct page **pagep, void **fsdata, 2311 struct page **pagep, void **fsdata,
2357 get_block_t *get_block, loff_t *bytes) 2312 get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2318,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2363 2318
2364 err = cont_expand_zero(file, mapping, pos, bytes); 2319 err = cont_expand_zero(file, mapping, pos, bytes);
2365 if (err) 2320 if (err)
2366 goto out; 2321 return err;
2367 2322
2368 zerofrom = *bytes & ~PAGE_CACHE_MASK; 2323 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2369 if (pos+len > *bytes && zerofrom & (blocksize-1)) { 2324 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2326,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2371 (*bytes)++; 2326 (*bytes)++;
2372 } 2327 }
2373 2328
2374 *pagep = NULL; 2329 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2375 err = block_write_begin_newtrunc(file, mapping, pos, len,
2376 flags, pagep, fsdata, get_block);
2377out:
2378 return err;
2379}
2380EXPORT_SYMBOL(cont_write_begin_newtrunc);
2381
2382int cont_write_begin(struct file *file, struct address_space *mapping,
2383 loff_t pos, unsigned len, unsigned flags,
2384 struct page **pagep, void **fsdata,
2385 get_block_t *get_block, loff_t *bytes)
2386{
2387 int ret;
2388
2389 ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
2390 pagep, fsdata, get_block, bytes);
2391 if (unlikely(ret)) {
2392 loff_t isize = mapping->host->i_size;
2393 if (pos + len > isize)
2394 vmtruncate(mapping->host, isize);
2395 }
2396
2397 return ret;
2398} 2330}
2399EXPORT_SYMBOL(cont_write_begin); 2331EXPORT_SYMBOL(cont_write_begin);
2400 2332
2401int block_prepare_write(struct page *page, unsigned from, unsigned to,
2402 get_block_t *get_block)
2403{
2404 struct inode *inode = page->mapping->host;
2405 int err = __block_prepare_write(inode, page, from, to, get_block);
2406 if (err)
2407 ClearPageUptodate(page);
2408 return err;
2409}
2410EXPORT_SYMBOL(block_prepare_write);
2411
2412int block_commit_write(struct page *page, unsigned from, unsigned to) 2333int block_commit_write(struct page *page, unsigned from, unsigned to)
2413{ 2334{
2414 struct inode *inode = page->mapping->host; 2335 struct inode *inode = page->mapping->host;
@@ -2510,11 +2431,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2510} 2431}
2511 2432
2512/* 2433/*
2513 * Filesystems implementing the new truncate sequence should use the 2434 * On entry, the page is fully not uptodate.
2514 * _newtrunc postfix variant which won't incorrectly call vmtruncate. 2435 * On exit the page is fully uptodate in the areas outside (from,to)
2515 * The filesystem needs to handle block truncation upon failure. 2436 * The filesystem needs to handle block truncation upon failure.
2516 */ 2437 */
2517int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2438int nobh_write_begin(struct address_space *mapping,
2518 loff_t pos, unsigned len, unsigned flags, 2439 loff_t pos, unsigned len, unsigned flags,
2519 struct page **pagep, void **fsdata, 2440 struct page **pagep, void **fsdata,
2520 get_block_t *get_block) 2441 get_block_t *get_block)
@@ -2547,8 +2468,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2547 unlock_page(page); 2468 unlock_page(page);
2548 page_cache_release(page); 2469 page_cache_release(page);
2549 *pagep = NULL; 2470 *pagep = NULL;
2550 return block_write_begin_newtrunc(file, mapping, pos, len, 2471 return block_write_begin(mapping, pos, len, flags, pagep,
2551 flags, pagep, fsdata, get_block); 2472 get_block);
2552 } 2473 }
2553 2474
2554 if (PageMappedToDisk(page)) 2475 if (PageMappedToDisk(page))
@@ -2654,35 +2575,6 @@ out_release:
2654 2575
2655 return ret; 2576 return ret;
2656} 2577}
2657EXPORT_SYMBOL(nobh_write_begin_newtrunc);
2658
2659/*
2660 * On entry, the page is fully not uptodate.
2661 * On exit the page is fully uptodate in the areas outside (from,to)
2662 */
2663int nobh_write_begin(struct file *file, struct address_space *mapping,
2664 loff_t pos, unsigned len, unsigned flags,
2665 struct page **pagep, void **fsdata,
2666 get_block_t *get_block)
2667{
2668 int ret;
2669
2670 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
2671 pagep, fsdata, get_block);
2672
2673 /*
2674 * prepare_write() may have instantiated a few blocks
2675 * outside i_size. Trim these off again. Don't need
2676 * i_size_read because we hold i_mutex.
2677 */
2678 if (unlikely(ret)) {
2679 loff_t isize = mapping->host->i_size;
2680 if (pos + len > isize)
2681 vmtruncate(mapping->host, isize);
2682 }
2683
2684 return ret;
2685}
2686EXPORT_SYMBOL(nobh_write_begin); 2578EXPORT_SYMBOL(nobh_write_begin);
2687 2579
2688int nobh_write_end(struct file *file, struct address_space *mapping, 2580int nobh_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 2906077ac798..a2603e7c0bb5 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -146,7 +146,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
146 goto error_unsupported; 146 goto error_unsupported;
147 147
148 /* get the cache size and blocksize */ 148 /* get the cache size and blocksize */
149 ret = vfs_statfs(root, &stats); 149 ret = vfs_statfs(&path, &stats);
150 if (ret < 0) 150 if (ret < 0)
151 goto error_unsupported; 151 goto error_unsupported;
152 152
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index c2413561ea75..24eb0d37241a 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -683,6 +683,10 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
683 unsigned fnr, unsigned bnr) 683 unsigned fnr, unsigned bnr)
684{ 684{
685 struct kstatfs stats; 685 struct kstatfs stats;
686 struct path path = {
687 .mnt = cache->mnt,
688 .dentry = cache->mnt->mnt_root,
689 };
686 int ret; 690 int ret;
687 691
688 //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", 692 //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
@@ -697,7 +701,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
697 /* find out how many pages of blockdev are available */ 701 /* find out how many pages of blockdev are available */
698 memset(&stats, 0, sizeof(stats)); 702 memset(&stats, 0, sizeof(stats));
699 703
700 ret = vfs_statfs(cache->mnt->mnt_root, &stats); 704 ret = vfs_statfs(&path, &stats);
701 if (ret < 0) { 705 if (ret < 0) {
702 if (ret == -EIO) 706 if (ret == -EIO)
703 cachefiles_io_error(cache, "statfs failed"); 707 cachefiles_io_error(cache, "statfs failed");
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a7840bf42c..42c7fafc8bfe 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -37,9 +37,9 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
37 37
38 printk(KERN_ERR "%sobject: OBJ%x\n", 38 printk(KERN_ERR "%sobject: OBJ%x\n",
39 prefix, object->fscache.debug_id); 39 prefix, object->fscache.debug_id);
40 printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n", 40 printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
41 prefix, fscache_object_states[object->fscache.state], 41 prefix, fscache_object_states[object->fscache.state],
42 object->fscache.flags, object->fscache.work.flags, 42 object->fscache.flags, work_busy(&object->fscache.work),
43 object->fscache.events, 43 object->fscache.events,
44 object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); 44 object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK);
45 printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", 45 printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",
@@ -212,7 +212,7 @@ wait_for_old_object:
212 212
213 /* if the object we're waiting for is queued for processing, 213 /* if the object we're waiting for is queued for processing,
214 * then just put ourselves on the queue behind it */ 214 * then just put ourselves on the queue behind it */
215 if (slow_work_is_queued(&xobject->fscache.work)) { 215 if (work_pending(&xobject->fscache.work)) {
216 _debug("queue OBJ%x behind OBJ%x immediately", 216 _debug("queue OBJ%x behind OBJ%x immediately",
217 object->fscache.debug_id, 217 object->fscache.debug_id,
218 xobject->fscache.debug_id); 218 xobject->fscache.debug_id);
@@ -220,8 +220,7 @@ wait_for_old_object:
220 } 220 }
221 221
222 /* otherwise we sleep until either the object we're waiting for 222 /* otherwise we sleep until either the object we're waiting for
223 * is done, or the slow-work facility wants the thread back to 223 * is done, or the fscache_object is congested */
224 * do other work */
225 wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); 224 wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE);
226 init_wait(&wait); 225 init_wait(&wait);
227 requeue = false; 226 requeue = false;
@@ -229,8 +228,8 @@ wait_for_old_object:
229 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); 228 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
230 if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) 229 if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags))
231 break; 230 break;
232 requeue = slow_work_sleep_till_thread_needed( 231
233 &object->fscache.work, &timeout); 232 requeue = fscache_object_sleep_till_congested(&timeout);
234 } while (timeout > 0 && !requeue); 233 } while (timeout > 0 && !requeue);
235 finish_wait(wq, &wait); 234 finish_wait(wq, &wait);
236 235
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0f0d41fbb03f..0e3c0924cc3a 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -422,7 +422,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
422 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; 422 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
423 423
424 op->op.flags &= FSCACHE_OP_KEEP_FLAGS; 424 op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
425 op->op.flags |= FSCACHE_OP_FAST; 425 op->op.flags |= FSCACHE_OP_ASYNC;
426 op->op.processor = cachefiles_read_copier; 426 op->op.processor = cachefiles_read_copier;
427 427
428 pagevec_init(&pagevec, 0); 428 pagevec_init(&pagevec, 0);
@@ -729,7 +729,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
729 pagevec_init(&pagevec, 0); 729 pagevec_init(&pagevec, 0);
730 730
731 op->op.flags &= FSCACHE_OP_KEEP_FLAGS; 731 op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
732 op->op.flags |= FSCACHE_OP_FAST; 732 op->op.flags |= FSCACHE_OP_ASYNC;
733 op->op.processor = cachefiles_read_copier; 733 op->op.processor = cachefiles_read_copier;
734 734
735 INIT_LIST_HEAD(&backpages); 735 INIT_LIST_HEAD(&backpages);
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 04b8280582a9..bc87b9c1d27e 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -2,7 +2,7 @@ config CEPH_FS
2 tristate "Ceph distributed file system (EXPERIMENTAL)" 2 tristate "Ceph distributed file system (EXPERIMENTAL)"
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select LIBCRC32C 4 select LIBCRC32C
5 select CONFIG_CRYPTO_AES 5 select CRYPTO_AES
6 help 6 help
7 Choose Y or M here to include support for mounting the 7 Choose Y or M here to include support for mounting the
8 experimental Ceph distributed file system. Ceph is an extremely 8 experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 74144d6389f0..b81be9a56487 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -627,7 +627,7 @@ retry:
627 if (fmode >= 0) 627 if (fmode >= 0)
628 __ceph_get_fmode(ci, fmode); 628 __ceph_get_fmode(ci, fmode);
629 spin_unlock(&inode->i_lock); 629 spin_unlock(&inode->i_lock);
630 wake_up(&ci->i_cap_wq); 630 wake_up_all(&ci->i_cap_wq);
631 return 0; 631 return 0;
632} 632}
633 633
@@ -1181,7 +1181,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1181 } 1181 }
1182 1182
1183 if (wake) 1183 if (wake)
1184 wake_up(&ci->i_cap_wq); 1184 wake_up_all(&ci->i_cap_wq);
1185 1185
1186 return delayed; 1186 return delayed;
1187} 1187}
@@ -2153,7 +2153,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2153 else if (flushsnaps) 2153 else if (flushsnaps)
2154 ceph_flush_snaps(ci); 2154 ceph_flush_snaps(ci);
2155 if (wake) 2155 if (wake)
2156 wake_up(&ci->i_cap_wq); 2156 wake_up_all(&ci->i_cap_wq);
2157 if (put) 2157 if (put)
2158 iput(inode); 2158 iput(inode);
2159} 2159}
@@ -2229,7 +2229,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2229 iput(inode); 2229 iput(inode);
2230 } else if (complete_capsnap) { 2230 } else if (complete_capsnap) {
2231 ceph_flush_snaps(ci); 2231 ceph_flush_snaps(ci);
2232 wake_up(&ci->i_cap_wq); 2232 wake_up_all(&ci->i_cap_wq);
2233 } 2233 }
2234 if (drop_capsnap) 2234 if (drop_capsnap)
2235 iput(inode); 2235 iput(inode);
@@ -2405,7 +2405,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2405 if (queue_invalidate) 2405 if (queue_invalidate)
2406 ceph_queue_invalidate(inode); 2406 ceph_queue_invalidate(inode);
2407 if (wake) 2407 if (wake)
2408 wake_up(&ci->i_cap_wq); 2408 wake_up_all(&ci->i_cap_wq);
2409 2409
2410 if (check_caps == 1) 2410 if (check_caps == 1)
2411 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, 2411 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
@@ -2460,7 +2460,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2460 struct ceph_inode_info, 2460 struct ceph_inode_info,
2461 i_flushing_item)->vfs_inode); 2461 i_flushing_item)->vfs_inode);
2462 mdsc->num_cap_flushing--; 2462 mdsc->num_cap_flushing--;
2463 wake_up(&mdsc->cap_flushing_wq); 2463 wake_up_all(&mdsc->cap_flushing_wq);
2464 dout(" inode %p now !flushing\n", inode); 2464 dout(" inode %p now !flushing\n", inode);
2465 2465
2466 if (ci->i_dirty_caps == 0) { 2466 if (ci->i_dirty_caps == 0) {
@@ -2472,7 +2472,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2472 } 2472 }
2473 } 2473 }
2474 spin_unlock(&mdsc->cap_dirty_lock); 2474 spin_unlock(&mdsc->cap_dirty_lock);
2475 wake_up(&ci->i_cap_wq); 2475 wake_up_all(&ci->i_cap_wq);
2476 2476
2477out: 2477out:
2478 spin_unlock(&inode->i_lock); 2478 spin_unlock(&inode->i_lock);
@@ -2984,6 +2984,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
2984 memcpy(*p, dentry->d_name.name, dentry->d_name.len); 2984 memcpy(*p, dentry->d_name.name, dentry->d_name.len);
2985 *p += dentry->d_name.len; 2985 *p += dentry->d_name.len;
2986 rel->dname_seq = cpu_to_le32(di->lease_seq); 2986 rel->dname_seq = cpu_to_le32(di->lease_seq);
2987 __ceph_mdsc_drop_dentry_lease(dentry);
2987 } 2988 }
2988 spin_unlock(&dentry->d_lock); 2989 spin_unlock(&dentry->d_lock);
2989 return ret; 2990 return ret;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f85719310db2..f94ed3c7f6a5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -266,6 +266,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
266 spin_lock(&inode->i_lock); 266 spin_lock(&inode->i_lock);
267 if ((filp->f_pos == 2 || fi->dentry) && 267 if ((filp->f_pos == 2 || fi->dentry) &&
268 !ceph_test_opt(client, NOASYNCREADDIR) && 268 !ceph_test_opt(client, NOASYNCREADDIR) &&
269 ceph_snap(inode) != CEPH_SNAPDIR &&
269 (ci->i_ceph_flags & CEPH_I_COMPLETE) && 270 (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
270 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 271 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
271 err = __dcache_readdir(filp, dirent, filldir); 272 err = __dcache_readdir(filp, dirent, filldir);
@@ -1013,18 +1014,22 @@ out_touch:
1013 1014
1014/* 1015/*
1015 * When a dentry is released, clear the dir I_COMPLETE if it was part 1016 * When a dentry is released, clear the dir I_COMPLETE if it was part
1016 * of the current dir gen. 1017 * of the current dir gen or if this is in the snapshot namespace.
1017 */ 1018 */
1018static void ceph_dentry_release(struct dentry *dentry) 1019static void ceph_dentry_release(struct dentry *dentry)
1019{ 1020{
1020 struct ceph_dentry_info *di = ceph_dentry(dentry); 1021 struct ceph_dentry_info *di = ceph_dentry(dentry);
1021 struct inode *parent_inode = dentry->d_parent->d_inode; 1022 struct inode *parent_inode = dentry->d_parent->d_inode;
1023 u64 snapid = ceph_snap(parent_inode);
1022 1024
1023 if (parent_inode) { 1025 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1026
1027 if (parent_inode && snapid != CEPH_SNAPDIR) {
1024 struct ceph_inode_info *ci = ceph_inode(parent_inode); 1028 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1025 1029
1026 spin_lock(&parent_inode->i_lock); 1030 spin_lock(&parent_inode->i_lock);
1027 if (ci->i_shared_gen == di->lease_shared_gen) { 1031 if (ci->i_shared_gen == di->lease_shared_gen ||
1032 snapid <= CEPH_MAXSNAP) {
1028 dout(" clearing %p complete (d_release)\n", 1033 dout(" clearing %p complete (d_release)\n",
1029 parent_inode); 1034 parent_inode);
1030 ci->i_ceph_flags &= ~CEPH_I_COMPLETE; 1035 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
@@ -1241,7 +1246,9 @@ struct dentry_operations ceph_dentry_ops = {
1241 1246
1242struct dentry_operations ceph_snapdir_dentry_ops = { 1247struct dentry_operations ceph_snapdir_dentry_ops = {
1243 .d_revalidate = ceph_snapdir_d_revalidate, 1248 .d_revalidate = ceph_snapdir_d_revalidate,
1249 .d_release = ceph_dentry_release,
1244}; 1250};
1245 1251
1246struct dentry_operations ceph_snap_dentry_ops = { 1252struct dentry_operations ceph_snap_dentry_ops = {
1253 .d_release = ceph_dentry_release,
1247}; 1254};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6251a1574b94..7c08698fad3e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -265,7 +265,7 @@ int ceph_release(struct inode *inode, struct file *file)
265 kmem_cache_free(ceph_file_cachep, cf); 265 kmem_cache_free(ceph_file_cachep, cf);
266 266
267 /* wake up anyone waiting for caps on this inode */ 267 /* wake up anyone waiting for caps on this inode */
268 wake_up(&ci->i_cap_wq); 268 wake_up_all(&ci->i_cap_wq);
269 return 0; 269 return 0;
270} 270}
271 271
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8f9b9fe8ef9f..389f9dbd9949 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1199,8 +1199,10 @@ retry_lookup:
1199 goto out; 1199 goto out;
1200 } 1200 }
1201 err = ceph_init_dentry(dn); 1201 err = ceph_init_dentry(dn);
1202 if (err < 0) 1202 if (err < 0) {
1203 dput(dn);
1203 goto out; 1204 goto out;
1205 }
1204 } else if (dn->d_inode && 1206 } else if (dn->d_inode &&
1205 (ceph_ino(dn->d_inode) != vino.ino || 1207 (ceph_ino(dn->d_inode) != vino.ino ||
1206 ceph_snap(dn->d_inode) != vino.snap)) { 1208 ceph_snap(dn->d_inode) != vino.snap)) {
@@ -1499,7 +1501,7 @@ retry:
1499 if (wrbuffer_refs == 0) 1501 if (wrbuffer_refs == 0)
1500 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 1502 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
1501 if (wake) 1503 if (wake)
1502 wake_up(&ci->i_cap_wq); 1504 wake_up_all(&ci->i_cap_wq);
1503} 1505}
1504 1506
1505 1507
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 416c08d315db..dd440bd438a9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -868,7 +868,7 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
868{ 868{
869 struct ceph_inode_info *ci = ceph_inode(inode); 869 struct ceph_inode_info *ci = ceph_inode(inode);
870 870
871 wake_up(&ci->i_cap_wq); 871 wake_up_all(&ci->i_cap_wq);
872 if (arg) { 872 if (arg) {
873 spin_lock(&inode->i_lock); 873 spin_lock(&inode->i_lock);
874 ci->i_wanted_max_size = 0; 874 ci->i_wanted_max_size = 0;
@@ -1564,7 +1564,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
1564 if (req->r_callback) 1564 if (req->r_callback)
1565 req->r_callback(mdsc, req); 1565 req->r_callback(mdsc, req);
1566 else 1566 else
1567 complete(&req->r_completion); 1567 complete_all(&req->r_completion);
1568} 1568}
1569 1569
1570/* 1570/*
@@ -1932,7 +1932,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1932 if (head->safe) { 1932 if (head->safe) {
1933 req->r_got_safe = true; 1933 req->r_got_safe = true;
1934 __unregister_request(mdsc, req); 1934 __unregister_request(mdsc, req);
1935 complete(&req->r_safe_completion); 1935 complete_all(&req->r_safe_completion);
1936 1936
1937 if (req->r_got_unsafe) { 1937 if (req->r_got_unsafe) {
1938 /* 1938 /*
@@ -1947,7 +1947,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1947 1947
1948 /* last unsafe request during umount? */ 1948 /* last unsafe request during umount? */
1949 if (mdsc->stopping && !__get_oldest_req(mdsc)) 1949 if (mdsc->stopping && !__get_oldest_req(mdsc))
1950 complete(&mdsc->safe_umount_waiters); 1950 complete_all(&mdsc->safe_umount_waiters);
1951 mutex_unlock(&mdsc->mutex); 1951 mutex_unlock(&mdsc->mutex);
1952 goto out; 1952 goto out;
1953 } 1953 }
@@ -2126,7 +2126,7 @@ static void handle_session(struct ceph_mds_session *session,
2126 pr_info("mds%d reconnect denied\n", session->s_mds); 2126 pr_info("mds%d reconnect denied\n", session->s_mds);
2127 remove_session_caps(session); 2127 remove_session_caps(session);
2128 wake = 1; /* for good measure */ 2128 wake = 1; /* for good measure */
2129 complete(&mdsc->session_close_waiters); 2129 complete_all(&mdsc->session_close_waiters);
2130 kick_requests(mdsc, mds); 2130 kick_requests(mdsc, mds);
2131 break; 2131 break;
2132 2132
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index cc115eafae11..54fe01c50706 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -345,7 +345,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
345 345
346out: 346out:
347 mutex_unlock(&monc->mutex); 347 mutex_unlock(&monc->mutex);
348 wake_up(&client->auth_wq); 348 wake_up_all(&client->auth_wq);
349} 349}
350 350
351/* 351/*
@@ -462,7 +462,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc,
462 } 462 }
463 mutex_unlock(&monc->mutex); 463 mutex_unlock(&monc->mutex);
464 if (req) { 464 if (req) {
465 complete(&req->completion); 465 complete_all(&req->completion);
466 put_generic_request(req); 466 put_generic_request(req);
467 } 467 }
468 return; 468 return;
@@ -718,7 +718,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
718 monc->m_auth->front_max); 718 monc->m_auth->front_max);
719 if (ret < 0) { 719 if (ret < 0) {
720 monc->client->auth_err = ret; 720 monc->client->auth_err = ret;
721 wake_up(&monc->client->auth_wq); 721 wake_up_all(&monc->client->auth_wq);
722 } else if (ret > 0) { 722 } else if (ret > 0) {
723 __send_prepared_auth_request(monc, ret); 723 __send_prepared_auth_request(monc, ret);
724 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { 724 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 92b7251a53f1..e38522347898 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -862,12 +862,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
862 if (req->r_callback) 862 if (req->r_callback)
863 req->r_callback(req, msg); 863 req->r_callback(req, msg);
864 else 864 else
865 complete(&req->r_completion); 865 complete_all(&req->r_completion);
866 866
867 if (flags & CEPH_OSD_FLAG_ONDISK) { 867 if (flags & CEPH_OSD_FLAG_ONDISK) {
868 if (req->r_safe_callback) 868 if (req->r_safe_callback)
869 req->r_safe_callback(req, msg); 869 req->r_safe_callback(req, msg);
870 complete(&req->r_safe_completion); /* fsync waiter */ 870 complete_all(&req->r_safe_completion); /* fsync waiter */
871 } 871 }
872 872
873done: 873done:
@@ -1083,7 +1083,7 @@ done:
1083 if (newmap) 1083 if (newmap)
1084 kick_requests(osdc, NULL); 1084 kick_requests(osdc, NULL);
1085 up_read(&osdc->map_sem); 1085 up_read(&osdc->map_sem);
1086 wake_up(&osdc->client->auth_wq); 1086 wake_up_all(&osdc->client->auth_wq);
1087 return; 1087 return;
1088 1088
1089bad: 1089bad:
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 277f8b339577..416d46adbf87 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -831,12 +831,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
831 /* remove any? */ 831 /* remove any? */
832 while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, 832 while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
833 node)->pgid, pgid) <= 0) { 833 node)->pgid, pgid) <= 0) {
834 struct rb_node *cur = rbp; 834 struct ceph_pg_mapping *cur =
835 rb_entry(rbp, struct ceph_pg_mapping, node);
836
835 rbp = rb_next(rbp); 837 rbp = rb_next(rbp);
836 dout(" removed pg_temp %llx\n", 838 dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
837 *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, 839 rb_erase(&cur->node, &map->pg_temp);
838 node)->pgid); 840 kfree(cur);
839 rb_erase(cur, &map->pg_temp);
840 } 841 }
841 842
842 if (pglen) { 843 if (pglen) {
@@ -852,19 +853,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
852 for (j = 0; j < pglen; j++) 853 for (j = 0; j < pglen; j++)
853 pg->osds[j] = ceph_decode_32(p); 854 pg->osds[j] = ceph_decode_32(p);
854 err = __insert_pg_mapping(pg, &map->pg_temp); 855 err = __insert_pg_mapping(pg, &map->pg_temp);
855 if (err) 856 if (err) {
857 kfree(pg);
856 goto bad; 858 goto bad;
859 }
857 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, 860 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
858 pglen); 861 pglen);
859 } 862 }
860 } 863 }
861 while (rbp) { 864 while (rbp) {
862 struct rb_node *cur = rbp; 865 struct ceph_pg_mapping *cur =
866 rb_entry(rbp, struct ceph_pg_mapping, node);
867
863 rbp = rb_next(rbp); 868 rbp = rb_next(rbp);
864 dout(" removed pg_temp %llx\n", 869 dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
865 *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, 870 rb_erase(&cur->node, &map->pg_temp);
866 node)->pgid); 871 kfree(cur);
867 rb_erase(cur, &map->pg_temp);
868 } 872 }
869 873
870 /* ignore the rest */ 874 /* ignore the rest */
diff --git a/fs/char_dev.c b/fs/char_dev.c
index d6db933df2b2..f80a4f25123c 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -20,6 +20,7 @@
20#include <linux/cdev.h> 20#include <linux/cdev.h>
21#include <linux/mutex.h> 21#include <linux/mutex.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/tty.h>
23 24
24#include "internal.h" 25#include "internal.h"
25 26
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 80f352596807..917b7d449bb2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,7 +2,6 @@ config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)" 2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET 3 depends on INET
4 select NLS 4 select NLS
5 select SLOW_WORK
6 help 5 help
7 This is the client VFS module for the Common Internet File System 6 This is the client VFS module for the Common Internet File System
8 (CIFS) protocol which is the successor to the Server Message Block 7 (CIFS) protocol which is the successor to the Server Message Block
@@ -71,14 +70,14 @@ config CIFS_WEAK_PW_HASH
71 If unsure, say N. 70 If unsure, say N.
72 71
73config CIFS_UPCALL 72config CIFS_UPCALL
74 bool "Kerberos/SPNEGO advanced session setup" 73 bool "Kerberos/SPNEGO advanced session setup"
75 depends on CIFS && KEYS 74 depends on CIFS && KEYS
76 help 75 select DNS_RESOLVER
77 Enables an upcall mechanism for CIFS which accesses 76 help
78 userspace helper utilities to provide SPNEGO packaged (RFC 4178) 77 Enables an upcall mechanism for CIFS which accesses userspace helper
79 Kerberos tickets which are needed to mount to certain secure servers 78 utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets
80 (for which more secure Kerberos authentication is required). If 79 which are needed to mount to certain secure servers (for which more
81 unsure, say N. 80 secure Kerberos authentication is required). If unsure, say N.
82 81
83config CIFS_XATTR 82config CIFS_XATTR
84 bool "CIFS extended attributes" 83 bool "CIFS extended attributes"
@@ -122,6 +121,7 @@ config CIFS_DEBUG2
122config CIFS_DFS_UPCALL 121config CIFS_DFS_UPCALL
123 bool "DFS feature support" 122 bool "DFS feature support"
124 depends on CIFS && KEYS 123 depends on CIFS && KEYS
124 select DNS_RESOLVER
125 help 125 help
126 Distributed File System (DFS) support is used to access shares 126 Distributed File System (DFS) support is used to access shares
127 transparently in an enterprise name space, even if the share 127 transparently in an enterprise name space, even if the share
@@ -131,6 +131,15 @@ config CIFS_DFS_UPCALL
131 IP addresses) which is needed for implicit mounts of DFS junction 131 IP addresses) which is needed for implicit mounts of DFS junction
132 points. If unsure, say N. 132 points. If unsure, say N.
133 133
134config CIFS_FSCACHE
135 bool "Provide CIFS client caching support (EXPERIMENTAL)"
136 depends on EXPERIMENTAL
137 depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
138 help
139 Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
140 to be cached locally on disk through the general filesystem cache
141 manager. If unsure, say N.
142
134config CIFS_EXPERIMENTAL 143config CIFS_EXPERIMENTAL
135 bool "CIFS Experimental Features (EXPERIMENTAL)" 144 bool "CIFS Experimental Features (EXPERIMENTAL)"
136 depends on CIFS && EXPERIMENTAL 145 depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 9948c0030e86..adefa60a9bdc 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -11,3 +11,5 @@ cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o 11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
12 12
13cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o 13cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
14
15cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
diff --git a/fs/cifs/README b/fs/cifs/README
index a727b7cb075f..a7081eeeb85d 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -568,8 +568,9 @@ module can be displayed via modinfo.
568Misc /proc/fs/cifs Flags and Debug Info 568Misc /proc/fs/cifs Flags and Debug Info
569======================================= 569=======================================
570Informational pseudo-files: 570Informational pseudo-files:
571DebugData Displays information about active CIFS sessions 571DebugData Displays information about active CIFS sessions and
572 and shares, as well as the cifs.ko version. 572 shares, features enabled as well as the cifs.ko
573 version.
573Stats Lists summary resource usage information as well as per 574Stats Lists summary resource usage information as well as per
574 share statistics, if CONFIG_CIFS_STATS in enabled 575 share statistics, if CONFIG_CIFS_STATS in enabled
575 in the kernel configuration. 576 in the kernel configuration.
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
new file mode 100644
index 000000000000..224d7bbd1fcc
--- /dev/null
+++ b/fs/cifs/cache.c
@@ -0,0 +1,331 @@
1/*
2 * fs/cifs/cache.c - CIFS filesystem cache index structure definitions
3 *
4 * Copyright (c) 2010 Novell, Inc.
5 * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21#include "fscache.h"
22#include "cifs_debug.h"
23
24/*
25 * CIFS filesystem definition for FS-Cache
26 */
27struct fscache_netfs cifs_fscache_netfs = {
28 .name = "cifs",
29 .version = 0,
30};
31
32/*
33 * Register CIFS for caching with FS-Cache
34 */
35int cifs_fscache_register(void)
36{
37 return fscache_register_netfs(&cifs_fscache_netfs);
38}
39
40/*
41 * Unregister CIFS for caching
42 */
43void cifs_fscache_unregister(void)
44{
45 fscache_unregister_netfs(&cifs_fscache_netfs);
46}
47
48/*
49 * Key layout of CIFS server cache index object
50 */
51struct cifs_server_key {
52 uint16_t family; /* address family */
53 uint16_t port; /* IP port */
54 union {
55 struct in_addr ipv4_addr;
56 struct in6_addr ipv6_addr;
57 } addr[0];
58};
59
60/*
61 * Server object keyed by {IPaddress,port,family} tuple
62 */
63static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
64 void *buffer, uint16_t maxbuf)
65{
66 const struct TCP_Server_Info *server = cookie_netfs_data;
67 const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr;
68 struct cifs_server_key *key = buffer;
69 uint16_t key_len = sizeof(struct cifs_server_key);
70
71 memset(key, 0, key_len);
72
73 /*
74 * Should not be a problem as sin_family/sin6_family overlays
75 * sa_family field
76 */
77 switch (sa->sa_family) {
78 case AF_INET:
79 key->family = server->addr.sockAddr.sin_family;
80 key->port = server->addr.sockAddr.sin_port;
81 key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr;
82 key_len += sizeof(key->addr[0].ipv4_addr);
83 break;
84
85 case AF_INET6:
86 key->family = server->addr.sockAddr6.sin6_family;
87 key->port = server->addr.sockAddr6.sin6_port;
88 key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr;
89 key_len += sizeof(key->addr[0].ipv6_addr);
90 break;
91
92 default:
93 cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family);
94 key_len = 0;
95 break;
96 }
97
98 return key_len;
99}
100
101/*
102 * Server object for FS-Cache
103 */
104const struct fscache_cookie_def cifs_fscache_server_index_def = {
105 .name = "CIFS.server",
106 .type = FSCACHE_COOKIE_TYPE_INDEX,
107 .get_key = cifs_server_get_key,
108};
109
110/*
111 * Auxiliary data attached to CIFS superblock within the cache
112 */
113struct cifs_fscache_super_auxdata {
114 u64 resource_id; /* unique server resource id */
115};
116
117static char *extract_sharename(const char *treename)
118{
119 const char *src;
120 char *delim, *dst;
121 int len;
122
123 /* skip double chars at the beginning */
124 src = treename + 2;
125
126 /* share name is always preceded by '\\' now */
127 delim = strchr(src, '\\');
128 if (!delim)
129 return ERR_PTR(-EINVAL);
130 delim++;
131 len = strlen(delim);
132
133 /* caller has to free the memory */
134 dst = kstrndup(delim, len, GFP_KERNEL);
135 if (!dst)
136 return ERR_PTR(-ENOMEM);
137
138 return dst;
139}
140
141/*
142 * Superblock object currently keyed by share name
143 */
144static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
145 uint16_t maxbuf)
146{
147 const struct cifsTconInfo *tcon = cookie_netfs_data;
148 char *sharename;
149 uint16_t len;
150
151 sharename = extract_sharename(tcon->treeName);
152 if (IS_ERR(sharename)) {
153 cFYI(1, "CIFS: couldn't extract sharename\n");
154 sharename = NULL;
155 return 0;
156 }
157
158 len = strlen(sharename);
159 if (len > maxbuf)
160 return 0;
161
162 memcpy(buffer, sharename, len);
163
164 kfree(sharename);
165
166 return len;
167}
168
169static uint16_t
170cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
171 uint16_t maxbuf)
172{
173 struct cifs_fscache_super_auxdata auxdata;
174 const struct cifsTconInfo *tcon = cookie_netfs_data;
175
176 memset(&auxdata, 0, sizeof(auxdata));
177 auxdata.resource_id = tcon->resource_id;
178
179 if (maxbuf > sizeof(auxdata))
180 maxbuf = sizeof(auxdata);
181
182 memcpy(buffer, &auxdata, maxbuf);
183
184 return maxbuf;
185}
186
187static enum
188fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
189 const void *data,
190 uint16_t datalen)
191{
192 struct cifs_fscache_super_auxdata auxdata;
193 const struct cifsTconInfo *tcon = cookie_netfs_data;
194
195 if (datalen != sizeof(auxdata))
196 return FSCACHE_CHECKAUX_OBSOLETE;
197
198 memset(&auxdata, 0, sizeof(auxdata));
199 auxdata.resource_id = tcon->resource_id;
200
201 if (memcmp(data, &auxdata, datalen) != 0)
202 return FSCACHE_CHECKAUX_OBSOLETE;
203
204 return FSCACHE_CHECKAUX_OKAY;
205}
206
207/*
208 * Superblock object for FS-Cache
209 */
210const struct fscache_cookie_def cifs_fscache_super_index_def = {
211 .name = "CIFS.super",
212 .type = FSCACHE_COOKIE_TYPE_INDEX,
213 .get_key = cifs_super_get_key,
214 .get_aux = cifs_fscache_super_get_aux,
215 .check_aux = cifs_fscache_super_check_aux,
216};
217
218/*
219 * Auxiliary data attached to CIFS inode within the cache
220 */
221struct cifs_fscache_inode_auxdata {
222 struct timespec last_write_time;
223 struct timespec last_change_time;
224 u64 eof;
225};
226
227static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data,
228 void *buffer, uint16_t maxbuf)
229{
230 const struct cifsInodeInfo *cifsi = cookie_netfs_data;
231 uint16_t keylen;
232
233 /* use the UniqueId as the key */
234 keylen = sizeof(cifsi->uniqueid);
235 if (keylen > maxbuf)
236 keylen = 0;
237 else
238 memcpy(buffer, &cifsi->uniqueid, keylen);
239
240 return keylen;
241}
242
243static void
244cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size)
245{
246 const struct cifsInodeInfo *cifsi = cookie_netfs_data;
247
248 *size = cifsi->vfs_inode.i_size;
249}
250
251static uint16_t
252cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer,
253 uint16_t maxbuf)
254{
255 struct cifs_fscache_inode_auxdata auxdata;
256 const struct cifsInodeInfo *cifsi = cookie_netfs_data;
257
258 memset(&auxdata, 0, sizeof(auxdata));
259 auxdata.eof = cifsi->server_eof;
260 auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
261 auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
262
263 if (maxbuf > sizeof(auxdata))
264 maxbuf = sizeof(auxdata);
265
266 memcpy(buffer, &auxdata, maxbuf);
267
268 return maxbuf;
269}
270
271static enum
272fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
273 const void *data,
274 uint16_t datalen)
275{
276 struct cifs_fscache_inode_auxdata auxdata;
277 struct cifsInodeInfo *cifsi = cookie_netfs_data;
278
279 if (datalen != sizeof(auxdata))
280 return FSCACHE_CHECKAUX_OBSOLETE;
281
282 memset(&auxdata, 0, sizeof(auxdata));
283 auxdata.eof = cifsi->server_eof;
284 auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
285 auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
286
287 if (memcmp(data, &auxdata, datalen) != 0)
288 return FSCACHE_CHECKAUX_OBSOLETE;
289
290 return FSCACHE_CHECKAUX_OKAY;
291}
292
293static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
294{
295 struct cifsInodeInfo *cifsi = cookie_netfs_data;
296 struct pagevec pvec;
297 pgoff_t first;
298 int loop, nr_pages;
299
300 pagevec_init(&pvec, 0);
301 first = 0;
302
303 cFYI(1, "cifs inode 0x%p now uncached", cifsi);
304
305 for (;;) {
306 nr_pages = pagevec_lookup(&pvec,
307 cifsi->vfs_inode.i_mapping, first,
308 PAGEVEC_SIZE - pagevec_count(&pvec));
309 if (!nr_pages)
310 break;
311
312 for (loop = 0; loop < nr_pages; loop++)
313 ClearPageFsCache(pvec.pages[loop]);
314
315 first = pvec.pages[nr_pages - 1]->index + 1;
316
317 pvec.nr = nr_pages;
318 pagevec_release(&pvec);
319 cond_resched();
320 }
321}
322
323const struct fscache_cookie_def cifs_fscache_inode_object_def = {
324 .name = "CIFS.uniqueid",
325 .type = FSCACHE_COOKIE_TYPE_DATAFILE,
326 .get_key = cifs_fscache_inode_get_key,
327 .get_attr = cifs_fscache_inode_get_attr,
328 .get_aux = cifs_fscache_inode_get_aux,
329 .check_aux = cifs_fscache_inode_check_aux,
330 .now_uncached = cifs_fscache_inode_now_uncached,
331};
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 4fce6e61b34e..eb1ba493489f 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -119,6 +119,31 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
119 "Display Internal CIFS Data Structures for Debugging\n" 119 "Display Internal CIFS Data Structures for Debugging\n"
120 "---------------------------------------------------\n"); 120 "---------------------------------------------------\n");
121 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); 121 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
122 seq_printf(m, "Features: ");
123#ifdef CONFIG_CIFS_DFS_UPCALL
124 seq_printf(m, "dfs");
125 seq_putc(m, ' ');
126#endif
127#ifdef CONFIG_CIFS_FSCACHE
128 seq_printf(m, "fscache");
129 seq_putc(m, ' ');
130#endif
131#ifdef CONFIG_CIFS_WEAK_PW_HASH
132 seq_printf(m, "lanman");
133 seq_putc(m, ' ');
134#endif
135#ifdef CONFIG_CIFS_POSIX
136 seq_printf(m, "posix");
137 seq_putc(m, ' ');
138#endif
139#ifdef CONFIG_CIFS_UPCALL
140 seq_printf(m, "spnego");
141 seq_putc(m, ' ');
142#endif
143#ifdef CONFIG_CIFS_XATTR
144 seq_printf(m, "xattr");
145#endif
146 seq_putc(m, '\n');
122 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); 147 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
123 seq_printf(m, "Servers:"); 148 seq_printf(m, "Servers:");
124 149
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ac19a6f3dae0..d6ced7aa23cf 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -141,7 +141,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
141 } 141 }
142 142
143 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 143 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
144 if (rc != 0) { 144 if (rc < 0) {
145 cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", 145 cERROR(1, "%s: Failed to resolve server part of %s to IP: %d",
146 __func__, *devname, rc); 146 __func__, *devname, rc);
147 goto compose_mount_options_err; 147 goto compose_mount_options_err;
@@ -150,8 +150,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
150 * assuming that we have 'unc=' and 'ip=' in 150 * assuming that we have 'unc=' and 'ip=' in
151 * the original sb_mountdata 151 * the original sb_mountdata
152 */ 152 */
153 md_len = strlen(sb_mountdata) + strlen(srvIP) + 153 md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12;
154 strlen(ref->node_name) + 12;
155 mountdata = kzalloc(md_len+1, GFP_KERNEL); 154 mountdata = kzalloc(md_len+1, GFP_KERNEL);
156 if (mountdata == NULL) { 155 if (mountdata == NULL) {
157 rc = -ENOMEM; 156 rc = -ENOMEM;
@@ -230,28 +229,22 @@ compose_mount_options_err:
230 goto compose_mount_options_out; 229 goto compose_mount_options_out;
231} 230}
232 231
233 232/**
234static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent, 233 * cifs_dfs_do_refmount - mounts specified path using provided refferal
235 struct dentry *dentry, const struct dfs_info3_param *ref) 234 * @cifs_sb: parent/root superblock
235 * @fullpath: full path in UNC format
236 * @ref: server's referral
237 */
238static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
239 const char *fullpath, const struct dfs_info3_param *ref)
236{ 240{
237 struct cifs_sb_info *cifs_sb;
238 struct vfsmount *mnt; 241 struct vfsmount *mnt;
239 char *mountdata; 242 char *mountdata;
240 char *devname = NULL; 243 char *devname = NULL;
241 char *fullpath;
242
243 cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
244 /*
245 * this function gives us a path with a double backslash prefix. We
246 * require a single backslash for DFS.
247 */
248 fullpath = build_path_from_dentry(dentry);
249 if (!fullpath)
250 return ERR_PTR(-ENOMEM);
251 244
245 /* strip first '\' from fullpath */
252 mountdata = cifs_compose_mount_options(cifs_sb->mountdata, 246 mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
253 fullpath + 1, ref, &devname); 247 fullpath + 1, ref, &devname);
254 kfree(fullpath);
255 248
256 if (IS_ERR(mountdata)) 249 if (IS_ERR(mountdata))
257 return (struct vfsmount *)mountdata; 250 return (struct vfsmount *)mountdata;
@@ -357,8 +350,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
357 rc = -EINVAL; 350 rc = -EINVAL;
358 goto out_err; 351 goto out_err;
359 } 352 }
360 mnt = cifs_dfs_do_refmount(nd->path.mnt, 353 mnt = cifs_dfs_do_refmount(cifs_sb,
361 nd->path.dentry, referrals + i); 354 full_path, referrals + i);
362 cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, 355 cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
363 referrals[i].node_name, mnt); 356 referrals[i].node_name, mnt);
364 357
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 246a167cb913..9e771450c3b8 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -35,6 +35,7 @@
35#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */ 35#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */
36#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */ 36#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */
37#define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/ 37#define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/
38#define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */
38 39
39struct cifs_sb_info { 40struct cifs_sb_info {
40 struct cifsTconInfo *tcon; /* primary mount */ 41 struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 379bd7d9c05f..87044906cd1f 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -84,6 +84,9 @@ struct key_type cifs_spnego_key_type = {
84/* strlen of ";uid=0x" */ 84/* strlen of ";uid=0x" */
85#define UID_KEY_LEN 7 85#define UID_KEY_LEN 7
86 86
87/* strlen of ";creduid=0x" */
88#define CREDUID_KEY_LEN 11
89
87/* strlen of ";user=" */ 90/* strlen of ";user=" */
88#define USER_KEY_LEN 6 91#define USER_KEY_LEN 6
89 92
@@ -107,6 +110,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
107 IP_KEY_LEN + INET6_ADDRSTRLEN + 110 IP_KEY_LEN + INET6_ADDRSTRLEN +
108 MAX_MECH_STR_LEN + 111 MAX_MECH_STR_LEN +
109 UID_KEY_LEN + (sizeof(uid_t) * 2) + 112 UID_KEY_LEN + (sizeof(uid_t) * 2) +
113 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
110 USER_KEY_LEN + strlen(sesInfo->userName) + 114 USER_KEY_LEN + strlen(sesInfo->userName) +
111 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; 115 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
112 116
@@ -144,6 +148,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
144 sprintf(dp, ";uid=0x%x", sesInfo->linux_uid); 148 sprintf(dp, ";uid=0x%x", sesInfo->linux_uid);
145 149
146 dp = description + strlen(description); 150 dp = description + strlen(description);
151 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
152
153 dp = description + strlen(description);
147 sprintf(dp, ";user=%s", sesInfo->userName); 154 sprintf(dp, ";user=%s", sesInfo->userName);
148 155
149 dp = description + strlen(description); 156 dp = description + strlen(description);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2cb1a70214d7..b7431afdd76d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -45,8 +45,8 @@
45#include "cifs_fs_sb.h" 45#include "cifs_fs_sb.h"
46#include <linux/mm.h> 46#include <linux/mm.h>
47#include <linux/key-type.h> 47#include <linux/key-type.h>
48#include "dns_resolve.h"
49#include "cifs_spnego.h" 48#include "cifs_spnego.h"
49#include "fscache.h"
50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ 50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
51 51
52int cifsFYI = 0; 52int cifsFYI = 0;
@@ -329,6 +329,14 @@ cifs_destroy_inode(struct inode *inode)
329} 329}
330 330
331static void 331static void
332cifs_evict_inode(struct inode *inode)
333{
334 truncate_inode_pages(&inode->i_data, 0);
335 end_writeback(inode);
336 cifs_fscache_release_inode_cookie(inode);
337}
338
339static void
332cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) 340cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
333{ 341{
334 seq_printf(s, ",addr="); 342 seq_printf(s, ",addr=");
@@ -473,14 +481,13 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
473 return 0; 481 return 0;
474} 482}
475 483
476void cifs_drop_inode(struct inode *inode) 484static int cifs_drop_inode(struct inode *inode)
477{ 485{
478 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 486 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
479 487
480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) 488 /* no serverino => unconditional eviction */
481 return generic_drop_inode(inode); 489 return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) ||
482 490 generic_drop_inode(inode);
483 return generic_delete_inode(inode);
484} 491}
485 492
486static const struct super_operations cifs_super_ops = { 493static const struct super_operations cifs_super_ops = {
@@ -489,6 +496,7 @@ static const struct super_operations cifs_super_ops = {
489 .alloc_inode = cifs_alloc_inode, 496 .alloc_inode = cifs_alloc_inode,
490 .destroy_inode = cifs_destroy_inode, 497 .destroy_inode = cifs_destroy_inode,
491 .drop_inode = cifs_drop_inode, 498 .drop_inode = cifs_drop_inode,
499 .evict_inode = cifs_evict_inode,
492/* .delete_inode = cifs_delete_inode, */ /* Do not need above 500/* .delete_inode = cifs_delete_inode, */ /* Do not need above
493 function unless later we add lazy close of inodes or unless the 501 function unless later we add lazy close of inodes or unless the
494 kernel forgets to call us with the same number of releases (closes) 502 kernel forgets to call us with the same number of releases (closes)
@@ -902,6 +910,10 @@ init_cifs(void)
902 cFYI(1, "cifs_max_pending set to max of 256"); 910 cFYI(1, "cifs_max_pending set to max of 256");
903 } 911 }
904 912
913 rc = cifs_fscache_register();
914 if (rc)
915 goto out;
916
905 rc = cifs_init_inodecache(); 917 rc = cifs_init_inodecache();
906 if (rc) 918 if (rc)
907 goto out_clean_proc; 919 goto out_clean_proc;
@@ -922,27 +934,13 @@ init_cifs(void)
922 if (rc) 934 if (rc)
923 goto out_unregister_filesystem; 935 goto out_unregister_filesystem;
924#endif 936#endif
925#ifdef CONFIG_CIFS_DFS_UPCALL
926 rc = cifs_init_dns_resolver();
927 if (rc)
928 goto out_unregister_key_type;
929#endif
930 rc = slow_work_register_user(THIS_MODULE);
931 if (rc)
932 goto out_unregister_resolver_key;
933 937
934 return 0; 938 return 0;
935 939
936 out_unregister_resolver_key:
937#ifdef CONFIG_CIFS_DFS_UPCALL
938 cifs_exit_dns_resolver();
939 out_unregister_key_type:
940#endif
941#ifdef CONFIG_CIFS_UPCALL 940#ifdef CONFIG_CIFS_UPCALL
942 unregister_key_type(&cifs_spnego_key_type);
943 out_unregister_filesystem: 941 out_unregister_filesystem:
944#endif
945 unregister_filesystem(&cifs_fs_type); 942 unregister_filesystem(&cifs_fs_type);
943#endif
946 out_destroy_request_bufs: 944 out_destroy_request_bufs:
947 cifs_destroy_request_bufs(); 945 cifs_destroy_request_bufs();
948 out_destroy_mids: 946 out_destroy_mids:
@@ -951,6 +949,8 @@ init_cifs(void)
951 cifs_destroy_inodecache(); 949 cifs_destroy_inodecache();
952 out_clean_proc: 950 out_clean_proc:
953 cifs_proc_clean(); 951 cifs_proc_clean();
952 cifs_fscache_unregister();
953 out:
954 return rc; 954 return rc;
955} 955}
956 956
@@ -959,9 +959,9 @@ exit_cifs(void)
959{ 959{
960 cFYI(DBG2, "exit_cifs"); 960 cFYI(DBG2, "exit_cifs");
961 cifs_proc_clean(); 961 cifs_proc_clean();
962 cifs_fscache_unregister();
962#ifdef CONFIG_CIFS_DFS_UPCALL 963#ifdef CONFIG_CIFS_DFS_UPCALL
963 cifs_dfs_release_automount_timer(); 964 cifs_dfs_release_automount_timer();
964 cifs_exit_dns_resolver();
965#endif 965#endif
966#ifdef CONFIG_CIFS_UPCALL 966#ifdef CONFIG_CIFS_UPCALL
967 unregister_key_type(&cifs_spnego_key_type); 967 unregister_key_type(&cifs_spnego_key_type);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a7eb65c84b1c..d82f5fb4761e 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -114,5 +114,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
114extern const struct export_operations cifs_export_ops; 114extern const struct export_operations cifs_export_ops;
115#endif /* EXPERIMENTAL */ 115#endif /* EXPERIMENTAL */
116 116
117#define CIFS_VERSION "1.64" 117#define CIFS_VERSION "1.65"
118#endif /* _CIFSFS_H */ 118#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a88479ceaad5..0cdfb8c32ac6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -16,10 +16,13 @@
16 * the GNU Lesser General Public License for more details. 16 * the GNU Lesser General Public License for more details.
17 * 17 *
18 */ 18 */
19#ifndef _CIFS_GLOB_H
20#define _CIFS_GLOB_H
21
19#include <linux/in.h> 22#include <linux/in.h>
20#include <linux/in6.h> 23#include <linux/in6.h>
21#include <linux/slab.h> 24#include <linux/slab.h>
22#include <linux/slow-work.h> 25#include <linux/workqueue.h>
23#include "cifs_fs_sb.h" 26#include "cifs_fs_sb.h"
24#include "cifsacl.h" 27#include "cifsacl.h"
25/* 28/*
@@ -34,7 +37,7 @@
34#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ 37#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */
35#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null 38#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null
36 termination then *2 for unicode versions */ 39 termination then *2 for unicode versions */
37#define MAX_PASSWORD_SIZE 16 40#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
38 41
39#define CIFS_MIN_RCV_POOL 4 42#define CIFS_MIN_RCV_POOL 4
40 43
@@ -80,8 +83,7 @@ enum statusEnum {
80}; 83};
81 84
82enum securityEnum { 85enum securityEnum {
83 PLAINTXT = 0, /* Legacy with Plaintext passwords */ 86 LANMAN = 0, /* Legacy LANMAN auth */
84 LANMAN, /* Legacy LANMAN auth */
85 NTLM, /* Legacy NTLM012 auth with NTLM hash */ 87 NTLM, /* Legacy NTLM012 auth with NTLM hash */
86 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 88 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
87 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ 89 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
@@ -142,7 +144,6 @@ struct TCP_Server_Info {
142 struct list_head pending_mid_q; 144 struct list_head pending_mid_q;
143 void *Server_NlsInfo; /* BB - placeholder for future NLS info */ 145 void *Server_NlsInfo; /* BB - placeholder for future NLS info */
144 unsigned short server_codepage; /* codepage for the server */ 146 unsigned short server_codepage; /* codepage for the server */
145 unsigned long ip_address; /* IP addr for the server if known */
146 enum protocolEnum protocolType; 147 enum protocolEnum protocolType;
147 char versionMajor; 148 char versionMajor;
148 char versionMinor; 149 char versionMinor;
@@ -190,19 +191,9 @@ struct TCP_Server_Info {
190 bool sec_mskerberos; /* supports legacy MS Kerberos */ 191 bool sec_mskerberos; /* supports legacy MS Kerberos */
191 bool sec_kerberosu2u; /* supports U2U Kerberos */ 192 bool sec_kerberosu2u; /* supports U2U Kerberos */
192 bool sec_ntlmssp; /* supports NTLMSSP */ 193 bool sec_ntlmssp; /* supports NTLMSSP */
193}; 194#ifdef CONFIG_CIFS_FSCACHE
194 195 struct fscache_cookie *fscache; /* client index cache cookie */
195/* 196#endif
196 * The following is our shortcut to user information. We surface the uid,
197 * and name. We always get the password on the fly in case it
198 * has changed. We also hang a list of sessions owned by this user off here.
199 */
200struct cifsUidInfo {
201 struct list_head userList;
202 struct list_head sessionList; /* SMB sessions for this user */
203 uid_t linux_uid;
204 char user[MAX_USERNAME_SIZE + 1]; /* ascii name of user */
205 /* BB may need ptr or callback for PAM or WinBind info */
206}; 197};
207 198
208/* 199/*
@@ -212,9 +203,6 @@ struct cifsSesInfo {
212 struct list_head smb_ses_list; 203 struct list_head smb_ses_list;
213 struct list_head tcon_list; 204 struct list_head tcon_list;
214 struct mutex session_mutex; 205 struct mutex session_mutex;
215#if 0
216 struct cifsUidInfo *uidInfo; /* pointer to user info */
217#endif
218 struct TCP_Server_Info *server; /* pointer to server info */ 206 struct TCP_Server_Info *server; /* pointer to server info */
219 int ses_count; /* reference counter */ 207 int ses_count; /* reference counter */
220 enum statusEnum status; 208 enum statusEnum status;
@@ -226,7 +214,8 @@ struct cifsSesInfo {
226 char *serverNOS; /* name of network operating system of server */ 214 char *serverNOS; /* name of network operating system of server */
227 char *serverDomain; /* security realm of server */ 215 char *serverDomain; /* security realm of server */
228 int Suid; /* remote smb uid */ 216 int Suid; /* remote smb uid */
229 uid_t linux_uid; /* local Linux uid */ 217 uid_t linux_uid; /* overriding owner of files on the mount */
218 uid_t cred_uid; /* owner of credentials */
230 int capabilities; 219 int capabilities;
231 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 220 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
232 TCP names - will ipv6 and sctp addresses fit? */ 221 TCP names - will ipv6 and sctp addresses fit? */
@@ -311,6 +300,10 @@ struct cifsTconInfo {
311 bool local_lease:1; /* check leases (only) on local system not remote */ 300 bool local_lease:1; /* check leases (only) on local system not remote */
312 bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ 301 bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
313 bool need_reconnect:1; /* connection reset, tid now invalid */ 302 bool need_reconnect:1; /* connection reset, tid now invalid */
303#ifdef CONFIG_CIFS_FSCACHE
304 u64 resource_id; /* server resource id */
305 struct fscache_cookie *fscache; /* cookie for share */
306#endif
314 /* BB add field for back pointer to sb struct(s)? */ 307 /* BB add field for back pointer to sb struct(s)? */
315}; 308};
316 309
@@ -363,7 +356,7 @@ struct cifsFileInfo {
363 atomic_t count; /* reference count */ 356 atomic_t count; /* reference count */
364 struct mutex fh_mutex; /* prevents reopen race after dead ses*/ 357 struct mutex fh_mutex; /* prevents reopen race after dead ses*/
365 struct cifs_search_info srch_inf; 358 struct cifs_search_info srch_inf;
366 struct slow_work oplock_break; /* slow_work job for oplock breaks */ 359 struct work_struct oplock_break; /* work for oplock breaks */
367}; 360};
368 361
369/* Take a reference on the file private data */ 362/* Take a reference on the file private data */
@@ -398,6 +391,9 @@ struct cifsInodeInfo {
398 bool invalid_mapping:1; /* pagecache is invalid */ 391 bool invalid_mapping:1; /* pagecache is invalid */
399 u64 server_eof; /* current file size on server */ 392 u64 server_eof; /* current file size on server */
400 u64 uniqueid; /* server inode number */ 393 u64 uniqueid; /* server inode number */
394#ifdef CONFIG_CIFS_FSCACHE
395 struct fscache_cookie *fscache;
396#endif
401 struct inode vfs_inode; 397 struct inode vfs_inode;
402}; 398};
403 399
@@ -732,4 +728,10 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
732GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ 728GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
733GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ 729GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
734 730
731void cifs_oplock_break(struct work_struct *work);
732void cifs_oplock_break_get(struct cifsFileInfo *cfile);
733void cifs_oplock_break_put(struct cifsFileInfo *cfile);
734
735extern const struct slow_work_ops cifs_oplock_break_ops; 735extern const struct slow_work_ops cifs_oplock_break_ops;
736
737#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb6318b81509..1f5450814087 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -86,7 +86,9 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
86extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 86extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
87extern int decode_negTokenInit(unsigned char *security_blob, int length, 87extern int decode_negTokenInit(unsigned char *security_blob, int length,
88 struct TCP_Server_Info *server); 88 struct TCP_Server_Info *server);
89extern int cifs_convert_address(char *src, void *dst); 89extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
90extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
91 unsigned short int port);
90extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 92extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
91extern void header_assemble(struct smb_hdr *, char /* command */ , 93extern void header_assemble(struct smb_hdr *, char /* command */ ,
92 const struct cifsTconInfo *, int /* length of 94 const struct cifsTconInfo *, int /* length of
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2208f06e4c45..95c2ea67edfb 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -48,6 +48,7 @@
48#include "nterr.h" 48#include "nterr.h"
49#include "rfc1002pdu.h" 49#include "rfc1002pdu.h"
50#include "cn_cifs.h" 50#include "cn_cifs.h"
51#include "fscache.h"
51 52
52#define CIFS_PORT 445 53#define CIFS_PORT 445
53#define RFC1001_PORT 139 54#define RFC1001_PORT 139
@@ -66,6 +67,7 @@ struct smb_vol {
66 char *iocharset; /* local code page for mapping to and from Unicode */ 67 char *iocharset; /* local code page for mapping to and from Unicode */
67 char source_rfc1001_name[16]; /* netbios name of client */ 68 char source_rfc1001_name[16]; /* netbios name of client */
68 char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ 69 char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
70 uid_t cred_uid;
69 uid_t linux_uid; 71 uid_t linux_uid;
70 gid_t linux_gid; 72 gid_t linux_gid;
71 mode_t file_mode; 73 mode_t file_mode;
@@ -97,6 +99,7 @@ struct smb_vol {
97 bool noblocksnd:1; 99 bool noblocksnd:1;
98 bool noautotune:1; 100 bool noautotune:1;
99 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ 101 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
102 bool fsc:1; /* enable fscache */
100 unsigned int rsize; 103 unsigned int rsize;
101 unsigned int wsize; 104 unsigned int wsize;
102 bool sockopt_tcp_nodelay:1; 105 bool sockopt_tcp_nodelay:1;
@@ -830,7 +833,8 @@ cifs_parse_mount_options(char *options, const char *devname,
830 /* null target name indicates to use *SMBSERVR default called name 833 /* null target name indicates to use *SMBSERVR default called name
831 if we end up sending RFC1001 session initialize */ 834 if we end up sending RFC1001 session initialize */
832 vol->target_rfc1001_name[0] = 0; 835 vol->target_rfc1001_name[0] = 0;
833 vol->linux_uid = current_uid(); /* use current_euid() instead? */ 836 vol->cred_uid = current_uid();
837 vol->linux_uid = current_uid();
834 vol->linux_gid = current_gid(); 838 vol->linux_gid = current_gid();
835 839
836 /* default to only allowing write access to owner of the mount */ 840 /* default to only allowing write access to owner of the mount */
@@ -1257,6 +1261,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1257 } else if ((strnicmp(data, "nocase", 6) == 0) || 1261 } else if ((strnicmp(data, "nocase", 6) == 0) ||
1258 (strnicmp(data, "ignorecase", 10) == 0)) { 1262 (strnicmp(data, "ignorecase", 10) == 0)) {
1259 vol->nocase = 1; 1263 vol->nocase = 1;
1264 } else if (strnicmp(data, "mand", 4) == 0) {
1265 /* ignore */
1266 } else if (strnicmp(data, "nomand", 6) == 0) {
1267 /* ignore */
1268 } else if (strnicmp(data, "_netdev", 7) == 0) {
1269 /* ignore */
1260 } else if (strnicmp(data, "brl", 3) == 0) { 1270 } else if (strnicmp(data, "brl", 3) == 0) {
1261 vol->nobrl = 0; 1271 vol->nobrl = 0;
1262 } else if ((strnicmp(data, "nobrl", 5) == 0) || 1272 } else if ((strnicmp(data, "nobrl", 5) == 0) ||
@@ -1331,6 +1341,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1331 printk(KERN_WARNING "CIFS: Mount option noac not " 1341 printk(KERN_WARNING "CIFS: Mount option noac not "
1332 "supported. Instead set " 1342 "supported. Instead set "
1333 "/proc/fs/cifs/LookupCacheEnabled to 0\n"); 1343 "/proc/fs/cifs/LookupCacheEnabled to 0\n");
1344 } else if (strnicmp(data, "fsc", 3) == 0) {
1345 vol->fsc = true;
1334 } else 1346 } else
1335 printk(KERN_WARNING "CIFS: Unknown mount option %s\n", 1347 printk(KERN_WARNING "CIFS: Unknown mount option %s\n",
1336 data); 1348 data);
@@ -1380,18 +1392,92 @@ cifs_parse_mount_options(char *options, const char *devname,
1380 return 0; 1392 return 0;
1381} 1393}
1382 1394
1395static bool
1396match_address(struct TCP_Server_Info *server, struct sockaddr *addr)
1397{
1398 struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
1399 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
1400
1401 switch (addr->sa_family) {
1402 case AF_INET:
1403 if (addr4->sin_addr.s_addr !=
1404 server->addr.sockAddr.sin_addr.s_addr)
1405 return false;
1406 if (addr4->sin_port &&
1407 addr4->sin_port != server->addr.sockAddr.sin_port)
1408 return false;
1409 break;
1410 case AF_INET6:
1411 if (!ipv6_addr_equal(&addr6->sin6_addr,
1412 &server->addr.sockAddr6.sin6_addr))
1413 return false;
1414 if (addr6->sin6_scope_id !=
1415 server->addr.sockAddr6.sin6_scope_id)
1416 return false;
1417 if (addr6->sin6_port &&
1418 addr6->sin6_port != server->addr.sockAddr6.sin6_port)
1419 return false;
1420 break;
1421 }
1422
1423 return true;
1424}
1425
1426static bool
1427match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
1428{
1429 unsigned int secFlags;
1430
1431 if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
1432 secFlags = vol->secFlg;
1433 else
1434 secFlags = global_secflags | vol->secFlg;
1435
1436 switch (server->secType) {
1437 case LANMAN:
1438 if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT)))
1439 return false;
1440 break;
1441 case NTLMv2:
1442 if (!(secFlags & CIFSSEC_MAY_NTLMV2))
1443 return false;
1444 break;
1445 case NTLM:
1446 if (!(secFlags & CIFSSEC_MAY_NTLM))
1447 return false;
1448 break;
1449 case Kerberos:
1450 if (!(secFlags & CIFSSEC_MAY_KRB5))
1451 return false;
1452 break;
1453 case RawNTLMSSP:
1454 if (!(secFlags & CIFSSEC_MAY_NTLMSSP))
1455 return false;
1456 break;
1457 default:
1458 /* shouldn't happen */
1459 return false;
1460 }
1461
1462 /* now check if signing mode is acceptible */
1463 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
1464 (server->secMode & SECMODE_SIGN_REQUIRED))
1465 return false;
1466 else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) &&
1467 (server->secMode &
1468 (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0)
1469 return false;
1470
1471 return true;
1472}
1473
1383static struct TCP_Server_Info * 1474static struct TCP_Server_Info *
1384cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port) 1475cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1385{ 1476{
1386 struct list_head *tmp;
1387 struct TCP_Server_Info *server; 1477 struct TCP_Server_Info *server;
1388 struct sockaddr_in *addr4 = (struct sockaddr_in *) addr;
1389 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) addr;
1390 1478
1391 write_lock(&cifs_tcp_ses_lock); 1479 write_lock(&cifs_tcp_ses_lock);
1392 list_for_each(tmp, &cifs_tcp_ses_list) { 1480 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
1393 server = list_entry(tmp, struct TCP_Server_Info,
1394 tcp_ses_list);
1395 /* 1481 /*
1396 * the demux thread can exit on its own while still in CifsNew 1482 * the demux thread can exit on its own while still in CifsNew
1397 * so don't accept any sockets in that state. Since the 1483 * so don't accept any sockets in that state. Since the
@@ -1401,37 +1487,11 @@ cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
1401 if (server->tcpStatus == CifsNew) 1487 if (server->tcpStatus == CifsNew)
1402 continue; 1488 continue;
1403 1489
1404 switch (addr->ss_family) { 1490 if (!match_address(server, addr))
1405 case AF_INET: 1491 continue;
1406 if (addr4->sin_addr.s_addr ==
1407 server->addr.sockAddr.sin_addr.s_addr) {
1408 addr4->sin_port = htons(port);
1409 /* user overrode default port? */
1410 if (addr4->sin_port) {
1411 if (addr4->sin_port !=
1412 server->addr.sockAddr.sin_port)
1413 continue;
1414 }
1415 break;
1416 } else
1417 continue;
1418 1492
1419 case AF_INET6: 1493 if (!match_security(server, vol))
1420 if (ipv6_addr_equal(&addr6->sin6_addr, 1494 continue;
1421 &server->addr.sockAddr6.sin6_addr) &&
1422 (addr6->sin6_scope_id ==
1423 server->addr.sockAddr6.sin6_scope_id)) {
1424 addr6->sin6_port = htons(port);
1425 /* user overrode default port? */
1426 if (addr6->sin6_port) {
1427 if (addr6->sin6_port !=
1428 server->addr.sockAddr6.sin6_port)
1429 continue;
1430 }
1431 break;
1432 } else
1433 continue;
1434 }
1435 1495
1436 ++server->srv_count; 1496 ++server->srv_count;
1437 write_unlock(&cifs_tcp_ses_lock); 1497 write_unlock(&cifs_tcp_ses_lock);
@@ -1460,6 +1520,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
1460 server->tcpStatus = CifsExiting; 1520 server->tcpStatus = CifsExiting;
1461 spin_unlock(&GlobalMid_Lock); 1521 spin_unlock(&GlobalMid_Lock);
1462 1522
1523 cifs_fscache_release_client_cookie(server);
1524
1463 task = xchg(&server->tsk, NULL); 1525 task = xchg(&server->tsk, NULL);
1464 if (task) 1526 if (task)
1465 force_sig(SIGKILL, task); 1527 force_sig(SIGKILL, task);
@@ -1479,7 +1541,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1479 cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip); 1541 cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip);
1480 1542
1481 if (volume_info->UNCip && volume_info->UNC) { 1543 if (volume_info->UNCip && volume_info->UNC) {
1482 rc = cifs_convert_address(volume_info->UNCip, &addr); 1544 rc = cifs_fill_sockaddr((struct sockaddr *)&addr,
1545 volume_info->UNCip,
1546 strlen(volume_info->UNCip),
1547 volume_info->port);
1483 if (!rc) { 1548 if (!rc) {
1484 /* we failed translating address */ 1549 /* we failed translating address */
1485 rc = -EINVAL; 1550 rc = -EINVAL;
@@ -1499,7 +1564,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1499 } 1564 }
1500 1565
1501 /* see if we already have a matching tcp_ses */ 1566 /* see if we already have a matching tcp_ses */
1502 tcp_ses = cifs_find_tcp_session(&addr, volume_info->port); 1567 tcp_ses = cifs_find_tcp_session((struct sockaddr *)&addr, volume_info);
1503 if (tcp_ses) 1568 if (tcp_ses)
1504 return tcp_ses; 1569 return tcp_ses;
1505 1570
@@ -1543,12 +1608,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1543 cFYI(1, "attempting ipv6 connect"); 1608 cFYI(1, "attempting ipv6 connect");
1544 /* BB should we allow ipv6 on port 139? */ 1609 /* BB should we allow ipv6 on port 139? */
1545 /* other OS never observed in Wild doing 139 with v6 */ 1610 /* other OS never observed in Wild doing 139 with v6 */
1546 sin_server6->sin6_port = htons(volume_info->port);
1547 memcpy(&tcp_ses->addr.sockAddr6, sin_server6, 1611 memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
1548 sizeof(struct sockaddr_in6)); 1612 sizeof(struct sockaddr_in6));
1549 rc = ipv6_connect(tcp_ses); 1613 rc = ipv6_connect(tcp_ses);
1550 } else { 1614 } else {
1551 sin_server->sin_port = htons(volume_info->port);
1552 memcpy(&tcp_ses->addr.sockAddr, sin_server, 1615 memcpy(&tcp_ses->addr.sockAddr, sin_server,
1553 sizeof(struct sockaddr_in)); 1616 sizeof(struct sockaddr_in));
1554 rc = ipv4_connect(tcp_ses); 1617 rc = ipv4_connect(tcp_ses);
@@ -1577,6 +1640,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1577 list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list); 1640 list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
1578 write_unlock(&cifs_tcp_ses_lock); 1641 write_unlock(&cifs_tcp_ses_lock);
1579 1642
1643 cifs_fscache_get_client_cookie(tcp_ses);
1644
1580 return tcp_ses; 1645 return tcp_ses;
1581 1646
1582out_err: 1647out_err:
@@ -1591,17 +1656,27 @@ out_err:
1591} 1656}
1592 1657
1593static struct cifsSesInfo * 1658static struct cifsSesInfo *
1594cifs_find_smb_ses(struct TCP_Server_Info *server, char *username) 1659cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1595{ 1660{
1596 struct list_head *tmp;
1597 struct cifsSesInfo *ses; 1661 struct cifsSesInfo *ses;
1598 1662
1599 write_lock(&cifs_tcp_ses_lock); 1663 write_lock(&cifs_tcp_ses_lock);
1600 list_for_each(tmp, &server->smb_ses_list) { 1664 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
1601 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); 1665 switch (server->secType) {
1602 if (strncmp(ses->userName, username, MAX_USERNAME_SIZE)) 1666 case Kerberos:
1603 continue; 1667 if (vol->cred_uid != ses->cred_uid)
1604 1668 continue;
1669 break;
1670 default:
1671 /* anything else takes username/password */
1672 if (strncmp(ses->userName, vol->username,
1673 MAX_USERNAME_SIZE))
1674 continue;
1675 if (strlen(vol->username) != 0 &&
1676 strncmp(ses->password, vol->password,
1677 MAX_PASSWORD_SIZE))
1678 continue;
1679 }
1605 ++ses->ses_count; 1680 ++ses->ses_count;
1606 write_unlock(&cifs_tcp_ses_lock); 1681 write_unlock(&cifs_tcp_ses_lock);
1607 return ses; 1682 return ses;
@@ -1643,7 +1718,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1643 1718
1644 xid = GetXid(); 1719 xid = GetXid();
1645 1720
1646 ses = cifs_find_smb_ses(server, volume_info->username); 1721 ses = cifs_find_smb_ses(server, volume_info);
1647 if (ses) { 1722 if (ses) {
1648 cFYI(1, "Existing smb sess found (status=%d)", ses->status); 1723 cFYI(1, "Existing smb sess found (status=%d)", ses->status);
1649 1724
@@ -1706,6 +1781,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1706 if (ses->domainName) 1781 if (ses->domainName)
1707 strcpy(ses->domainName, volume_info->domainname); 1782 strcpy(ses->domainName, volume_info->domainname);
1708 } 1783 }
1784 ses->cred_uid = volume_info->cred_uid;
1709 ses->linux_uid = volume_info->linux_uid; 1785 ses->linux_uid = volume_info->linux_uid;
1710 ses->overrideSecFlg = volume_info->secFlg; 1786 ses->overrideSecFlg = volume_info->secFlg;
1711 1787
@@ -1773,6 +1849,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
1773 CIFSSMBTDis(xid, tcon); 1849 CIFSSMBTDis(xid, tcon);
1774 _FreeXid(xid); 1850 _FreeXid(xid);
1775 1851
1852 cifs_fscache_release_super_cookie(tcon);
1776 tconInfoFree(tcon); 1853 tconInfoFree(tcon);
1777 cifs_put_smb_ses(ses); 1854 cifs_put_smb_ses(ses);
1778} 1855}
@@ -1843,6 +1920,8 @@ cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info)
1843 list_add(&tcon->tcon_list, &ses->tcon_list); 1920 list_add(&tcon->tcon_list, &ses->tcon_list);
1844 write_unlock(&cifs_tcp_ses_lock); 1921 write_unlock(&cifs_tcp_ses_lock);
1845 1922
1923 cifs_fscache_get_super_cookie(tcon);
1924
1846 return tcon; 1925 return tcon;
1847 1926
1848out_fail: 1927out_fail:
@@ -2397,6 +2476,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2397 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; 2476 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
2398 if (pvolume_info->dynperm) 2477 if (pvolume_info->dynperm)
2399 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; 2478 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
2479 if (pvolume_info->fsc)
2480 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
2400 if (pvolume_info->direct_io) { 2481 if (pvolume_info->direct_io) {
2401 cFYI(1, "mounting share using direct i/o"); 2482 cFYI(1, "mounting share using direct i/o");
2402 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; 2483 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e7ae78b66fa1..578d88c5b46e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -130,12 +130,6 @@ cifs_bp_rename_retry:
130 return full_path; 130 return full_path;
131} 131}
132 132
133/*
134 * When called with struct file pointer set to NULL, there is no way we could
135 * update file->private_data, but getting it stuck on openFileList provides a
136 * way to access it from cifs_fill_filedata and thereby set file->private_data
137 * from cifs_open.
138 */
139struct cifsFileInfo * 133struct cifsFileInfo *
140cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, 134cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
141 struct file *file, struct vfsmount *mnt, unsigned int oflags) 135 struct file *file, struct vfsmount *mnt, unsigned int oflags)
@@ -163,7 +157,7 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
163 mutex_init(&pCifsFile->lock_mutex); 157 mutex_init(&pCifsFile->lock_mutex);
164 INIT_LIST_HEAD(&pCifsFile->llist); 158 INIT_LIST_HEAD(&pCifsFile->llist);
165 atomic_set(&pCifsFile->count, 1); 159 atomic_set(&pCifsFile->count, 1);
166 slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops); 160 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
167 161
168 write_lock(&GlobalSMBSeslock); 162 write_lock(&GlobalSMBSeslock);
169 list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); 163 list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 49315cbf742d..0eb87026cad3 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -4,6 +4,8 @@
4 * Copyright (c) 2007 Igor Mammedov 4 * Copyright (c) 2007 Igor Mammedov
5 * Author(s): Igor Mammedov (niallain@gmail.com) 5 * Author(s): Igor Mammedov (niallain@gmail.com)
6 * Steve French (sfrench@us.ibm.com) 6 * Steve French (sfrench@us.ibm.com)
7 * Wang Lei (wang840925@gmail.com)
8 * David Howells (dhowells@redhat.com)
7 * 9 *
8 * Contains the CIFS DFS upcall routines used for hostname to 10 * Contains the CIFS DFS upcall routines used for hostname to
9 * IP address translation. 11 * IP address translation.
@@ -24,214 +26,73 @@
24 */ 26 */
25 27
26#include <linux/slab.h> 28#include <linux/slab.h>
27#include <linux/keyctl.h> 29#include <linux/dns_resolver.h>
28#include <linux/key-type.h>
29#include <keys/user-type.h>
30#include "dns_resolve.h" 30#include "dns_resolve.h"
31#include "cifsglob.h" 31#include "cifsglob.h"
32#include "cifsproto.h" 32#include "cifsproto.h"
33#include "cifs_debug.h" 33#include "cifs_debug.h"
34 34
35static const struct cred *dns_resolver_cache; 35/**
36 36 * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
37/* Checks if supplied name is IP address 37 * @unc: UNC path specifying the server
38 * returns: 38 * @ip_addr: Where to return the IP address.
39 * 1 - name is IP 39 *
40 * 0 - name is not IP 40 * The IP address will be returned in string form, and the caller is
41 */ 41 * responsible for freeing it.
42static int 42 *
43is_ip(char *name) 43 * Returns length of result on success, -ve on error.
44{
45 struct sockaddr_storage ss;
46
47 return cifs_convert_address(name, &ss);
48}
49
50static int
51dns_resolver_instantiate(struct key *key, const void *data,
52 size_t datalen)
53{
54 int rc = 0;
55 char *ip;
56
57 ip = kmalloc(datalen + 1, GFP_KERNEL);
58 if (!ip)
59 return -ENOMEM;
60
61 memcpy(ip, data, datalen);
62 ip[datalen] = '\0';
63
64 /* make sure this looks like an address */
65 if (!is_ip(ip)) {
66 kfree(ip);
67 return -EINVAL;
68 }
69
70 key->type_data.x[0] = datalen;
71 key->payload.data = ip;
72
73 return rc;
74}
75
76static void
77dns_resolver_destroy(struct key *key)
78{
79 kfree(key->payload.data);
80}
81
82struct key_type key_type_dns_resolver = {
83 .name = "dns_resolver",
84 .def_datalen = sizeof(struct in_addr),
85 .describe = user_describe,
86 .instantiate = dns_resolver_instantiate,
87 .destroy = dns_resolver_destroy,
88 .match = user_match,
89};
90
91/* Resolves server name to ip address.
92 * input:
93 * unc - server UNC
94 * output:
95 * *ip_addr - pointer to server ip, caller responcible for freeing it.
96 * return 0 on success
97 */ 44 */
98int 45int
99dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) 46dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
100{ 47{
101 const struct cred *saved_cred; 48 struct sockaddr_storage ss;
102 int rc = -EAGAIN; 49 const char *hostname, *sep;
103 struct key *rkey = ERR_PTR(-EAGAIN);
104 char *name; 50 char *name;
105 char *data = NULL; 51 int len, rc;
106 int len;
107 52
108 if (!ip_addr || !unc) 53 if (!ip_addr || !unc)
109 return -EINVAL; 54 return -EINVAL;
110 55
111 /* search for server name delimiter */
112 len = strlen(unc); 56 len = strlen(unc);
113 if (len < 3) { 57 if (len < 3) {
114 cFYI(1, "%s: unc is too short: %s", __func__, unc); 58 cFYI(1, "%s: unc is too short: %s", __func__, unc);
115 return -EINVAL; 59 return -EINVAL;
116 } 60 }
117 len -= 2;
118 name = memchr(unc+2, '\\', len);
119 if (!name) {
120 cFYI(1, "%s: probably server name is whole unc: %s",
121 __func__, unc);
122 } else {
123 len = (name - unc) - 2/* leading // */;
124 }
125
126 name = kmalloc(len+1, GFP_KERNEL);
127 if (!name) {
128 rc = -ENOMEM;
129 return rc;
130 }
131 memcpy(name, unc+2, len);
132 name[len] = 0;
133
134 if (is_ip(name)) {
135 cFYI(1, "%s: it is IP, skipping dns upcall: %s",
136 __func__, name);
137 data = name;
138 goto skip_upcall;
139 }
140 61
141 saved_cred = override_creds(dns_resolver_cache); 62 /* Discount leading slashes for cifs */
142 rkey = request_key(&key_type_dns_resolver, name, ""); 63 len -= 2;
143 revert_creds(saved_cred); 64 hostname = unc + 2;
144 if (!IS_ERR(rkey)) {
145 if (!(rkey->perm & KEY_USR_VIEW)) {
146 down_read(&rkey->sem);
147 rkey->perm |= KEY_USR_VIEW;
148 up_read(&rkey->sem);
149 }
150 len = rkey->type_data.x[0];
151 data = rkey->payload.data;
152 } else {
153 cERROR(1, "%s: unable to resolve: %s", __func__, name);
154 goto out;
155 }
156
157skip_upcall:
158 if (data) {
159 *ip_addr = kmalloc(len + 1, GFP_KERNEL);
160 if (*ip_addr) {
161 memcpy(*ip_addr, data, len + 1);
162 if (!IS_ERR(rkey))
163 cFYI(1, "%s: resolved: %s to %s", __func__,
164 name,
165 *ip_addr
166 );
167 rc = 0;
168 } else {
169 rc = -ENOMEM;
170 }
171 if (!IS_ERR(rkey))
172 key_put(rkey);
173 }
174 65
175out: 66 /* Search for server name delimiter */
176 kfree(name); 67 sep = memchr(hostname, '\\', len);
68 if (sep)
69 len = sep - unc;
70 else
71 cFYI(1, "%s: probably server name is whole unc: %s",
72 __func__, unc);
73
74 /* Try to interpret hostname as an IPv4 or IPv6 address */
75 rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len);
76 if (rc > 0)
77 goto name_is_IP_address;
78
79 /* Perform the upcall */
80 rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
81 if (rc < 0)
82 cERROR(1, "%s: unable to resolve: %*.*s",
83 __func__, len, len, hostname);
84 else
85 cFYI(1, "%s: resolved: %*.*s to %s",
86 __func__, len, len, hostname, *ip_addr);
177 return rc; 87 return rc;
178}
179 88
180int __init cifs_init_dns_resolver(void) 89name_is_IP_address:
181{ 90 name = kmalloc(len + 1, GFP_KERNEL);
182 struct cred *cred; 91 if (!name)
183 struct key *keyring;
184 int ret;
185
186 printk(KERN_NOTICE "Registering the %s key type\n",
187 key_type_dns_resolver.name);
188
189 /* create an override credential set with a special thread keyring in
190 * which DNS requests are cached
191 *
192 * this is used to prevent malicious redirections from being installed
193 * with add_key().
194 */
195 cred = prepare_kernel_cred(NULL);
196 if (!cred)
197 return -ENOMEM; 92 return -ENOMEM;
198 93 memcpy(name, hostname, len);
199 keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, 94 name[len] = 0;
200 (KEY_POS_ALL & ~KEY_POS_SETATTR) | 95 cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name);
201 KEY_USR_VIEW | KEY_USR_READ, 96 *ip_addr = name;
202 KEY_ALLOC_NOT_IN_QUOTA);
203 if (IS_ERR(keyring)) {
204 ret = PTR_ERR(keyring);
205 goto failed_put_cred;
206 }
207
208 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
209 if (ret < 0)
210 goto failed_put_key;
211
212 ret = register_key_type(&key_type_dns_resolver);
213 if (ret < 0)
214 goto failed_put_key;
215
216 /* instruct request_key() to use this special keyring as a cache for
217 * the results it looks up */
218 cred->thread_keyring = keyring;
219 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
220 dns_resolver_cache = cred;
221 return 0; 97 return 0;
222
223failed_put_key:
224 key_put(keyring);
225failed_put_cred:
226 put_cred(cred);
227 return ret;
228}
229
230void __exit cifs_exit_dns_resolver(void)
231{
232 key_revoke(dns_resolver_cache->thread_keyring);
233 unregister_key_type(&key_type_dns_resolver);
234 put_cred(dns_resolver_cache);
235 printk(KERN_NOTICE "Unregistered %s key type\n",
236 key_type_dns_resolver.name);
237} 98}
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 26b9eaa9f5ee..d3f5d27f4d06 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -24,8 +24,6 @@
24#define _DNS_RESOLVE_H 24#define _DNS_RESOLVE_H
25 25
26#ifdef __KERNEL__ 26#ifdef __KERNEL__
27extern int __init cifs_init_dns_resolver(void);
28extern void __exit cifs_exit_dns_resolver(void);
29extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); 27extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
30#endif /* KERNEL */ 28#endif /* KERNEL */
31 29
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 409e4f523e61..db11fdef0e92 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -40,6 +40,7 @@
40#include "cifs_unicode.h" 40#include "cifs_unicode.h"
41#include "cifs_debug.h" 41#include "cifs_debug.h"
42#include "cifs_fs_sb.h" 42#include "cifs_fs_sb.h"
43#include "fscache.h"
43 44
44static inline int cifs_convert_flags(unsigned int flags) 45static inline int cifs_convert_flags(unsigned int flags)
45{ 46{
@@ -282,6 +283,9 @@ int cifs_open(struct inode *inode, struct file *file)
282 CIFSSMBClose(xid, tcon, netfid); 283 CIFSSMBClose(xid, tcon, netfid);
283 rc = -ENOMEM; 284 rc = -ENOMEM;
284 } 285 }
286
287 cifs_fscache_set_inode_cookie(inode, file);
288
285 goto out; 289 goto out;
286 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 290 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
287 if (tcon->ses->serverNOS) 291 if (tcon->ses->serverNOS)
@@ -373,6 +377,8 @@ int cifs_open(struct inode *inode, struct file *file)
373 goto out; 377 goto out;
374 } 378 }
375 379
380 cifs_fscache_set_inode_cookie(inode, file);
381
376 if (oplock & CIFS_CREATE_ACTION) { 382 if (oplock & CIFS_CREATE_ACTION) {
377 /* time to set mode which we can not set earlier due to 383 /* time to set mode which we can not set earlier due to
378 problems creating new read-only files */ 384 problems creating new read-only files */
@@ -427,7 +433,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
427 __u16 netfid; 433 __u16 netfid;
428 434
429 if (file->private_data) 435 if (file->private_data)
430 pCifsFile = (struct cifsFileInfo *)file->private_data; 436 pCifsFile = file->private_data;
431 else 437 else
432 return -EBADF; 438 return -EBADF;
433 439
@@ -565,8 +571,7 @@ int cifs_close(struct inode *inode, struct file *file)
565 int xid, timeout; 571 int xid, timeout;
566 struct cifs_sb_info *cifs_sb; 572 struct cifs_sb_info *cifs_sb;
567 struct cifsTconInfo *pTcon; 573 struct cifsTconInfo *pTcon;
568 struct cifsFileInfo *pSMBFile = 574 struct cifsFileInfo *pSMBFile = file->private_data;
569 (struct cifsFileInfo *)file->private_data;
570 575
571 xid = GetXid(); 576 xid = GetXid();
572 577
@@ -641,8 +646,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
641{ 646{
642 int rc = 0; 647 int rc = 0;
643 int xid; 648 int xid;
644 struct cifsFileInfo *pCFileStruct = 649 struct cifsFileInfo *pCFileStruct = file->private_data;
645 (struct cifsFileInfo *)file->private_data;
646 char *ptmp; 650 char *ptmp;
647 651
648 cFYI(1, "Closedir inode = 0x%p", inode); 652 cFYI(1, "Closedir inode = 0x%p", inode);
@@ -863,8 +867,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
863 length, pfLock, 867 length, pfLock,
864 posix_lock_type, wait_flag); 868 posix_lock_type, wait_flag);
865 } else { 869 } else {
866 struct cifsFileInfo *fid = 870 struct cifsFileInfo *fid = file->private_data;
867 (struct cifsFileInfo *)file->private_data;
868 871
869 if (numLock) { 872 if (numLock) {
870 rc = CIFSSMBLock(xid, tcon, netfid, length, 873 rc = CIFSSMBLock(xid, tcon, netfid, length,
@@ -965,7 +968,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
965 968
966 if (file->private_data == NULL) 969 if (file->private_data == NULL)
967 return -EBADF; 970 return -EBADF;
968 open_file = (struct cifsFileInfo *) file->private_data; 971 open_file = file->private_data;
969 972
970 rc = generic_write_checks(file, poffset, &write_size, 0); 973 rc = generic_write_checks(file, poffset, &write_size, 0);
971 if (rc) 974 if (rc)
@@ -1067,7 +1070,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
1067 1070
1068 if (file->private_data == NULL) 1071 if (file->private_data == NULL)
1069 return -EBADF; 1072 return -EBADF;
1070 open_file = (struct cifsFileInfo *)file->private_data; 1073 open_file = file->private_data;
1071 1074
1072 xid = GetXid(); 1075 xid = GetXid();
1073 1076
@@ -1651,8 +1654,7 @@ int cifs_fsync(struct file *file, int datasync)
1651 int xid; 1654 int xid;
1652 int rc = 0; 1655 int rc = 0;
1653 struct cifsTconInfo *tcon; 1656 struct cifsTconInfo *tcon;
1654 struct cifsFileInfo *smbfile = 1657 struct cifsFileInfo *smbfile = file->private_data;
1655 (struct cifsFileInfo *)file->private_data;
1656 struct inode *inode = file->f_path.dentry->d_inode; 1658 struct inode *inode = file->f_path.dentry->d_inode;
1657 1659
1658 xid = GetXid(); 1660 xid = GetXid();
@@ -1756,7 +1758,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1756 FreeXid(xid); 1758 FreeXid(xid);
1757 return rc; 1759 return rc;
1758 } 1760 }
1759 open_file = (struct cifsFileInfo *)file->private_data; 1761 open_file = file->private_data;
1760 1762
1761 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1763 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1762 cFYI(1, "attempting read on write only file instance"); 1764 cFYI(1, "attempting read on write only file instance");
@@ -1837,7 +1839,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1837 FreeXid(xid); 1839 FreeXid(xid);
1838 return rc; 1840 return rc;
1839 } 1841 }
1840 open_file = (struct cifsFileInfo *)file->private_data; 1842 open_file = file->private_data;
1841 1843
1842 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1844 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1843 cFYI(1, "attempting read on write only file instance"); 1845 cFYI(1, "attempting read on write only file instance");
@@ -1942,6 +1944,9 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1942 SetPageUptodate(page); 1944 SetPageUptodate(page);
1943 unlock_page(page); 1945 unlock_page(page);
1944 data += PAGE_CACHE_SIZE; 1946 data += PAGE_CACHE_SIZE;
1947
1948 /* add page to FS-Cache */
1949 cifs_readpage_to_fscache(mapping->host, page);
1945 } 1950 }
1946 return; 1951 return;
1947} 1952}
@@ -1968,10 +1973,19 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1968 FreeXid(xid); 1973 FreeXid(xid);
1969 return rc; 1974 return rc;
1970 } 1975 }
1971 open_file = (struct cifsFileInfo *)file->private_data; 1976 open_file = file->private_data;
1972 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1977 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1973 pTcon = cifs_sb->tcon; 1978 pTcon = cifs_sb->tcon;
1974 1979
1980 /*
1981 * Reads as many pages as possible from fscache. Returns -ENOBUFS
1982 * immediately if the cookie is negative
1983 */
1984 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
1985 &num_pages);
1986 if (rc == 0)
1987 goto read_complete;
1988
1975 cFYI(DBG2, "rpages: num pages %d", num_pages); 1989 cFYI(DBG2, "rpages: num pages %d", num_pages);
1976 for (i = 0; i < num_pages; ) { 1990 for (i = 0; i < num_pages; ) {
1977 unsigned contig_pages; 1991 unsigned contig_pages;
@@ -2082,6 +2096,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2082 smb_read_data = NULL; 2096 smb_read_data = NULL;
2083 } 2097 }
2084 2098
2099read_complete:
2085 FreeXid(xid); 2100 FreeXid(xid);
2086 return rc; 2101 return rc;
2087} 2102}
@@ -2092,6 +2107,11 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
2092 char *read_data; 2107 char *read_data;
2093 int rc; 2108 int rc;
2094 2109
2110 /* Is the page cached? */
2111 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2112 if (rc == 0)
2113 goto read_complete;
2114
2095 page_cache_get(page); 2115 page_cache_get(page);
2096 read_data = kmap(page); 2116 read_data = kmap(page);
2097 /* for reads over a certain size could initiate async read ahead */ 2117 /* for reads over a certain size could initiate async read ahead */
@@ -2111,11 +2131,17 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
2111 2131
2112 flush_dcache_page(page); 2132 flush_dcache_page(page);
2113 SetPageUptodate(page); 2133 SetPageUptodate(page);
2134
2135 /* send this page to the cache */
2136 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2137
2114 rc = 0; 2138 rc = 0;
2115 2139
2116io_error: 2140io_error:
2117 kunmap(page); 2141 kunmap(page);
2118 page_cache_release(page); 2142 page_cache_release(page);
2143
2144read_complete:
2119 return rc; 2145 return rc;
2120} 2146}
2121 2147
@@ -2265,8 +2291,23 @@ out:
2265 return rc; 2291 return rc;
2266} 2292}
2267 2293
2268static void 2294static int cifs_release_page(struct page *page, gfp_t gfp)
2269cifs_oplock_break(struct slow_work *work) 2295{
2296 if (PagePrivate(page))
2297 return 0;
2298
2299 return cifs_fscache_release_page(page, gfp);
2300}
2301
2302static void cifs_invalidate_page(struct page *page, unsigned long offset)
2303{
2304 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2305
2306 if (offset == 0)
2307 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2308}
2309
2310void cifs_oplock_break(struct work_struct *work)
2270{ 2311{
2271 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 2312 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2272 oplock_break); 2313 oplock_break);
@@ -2303,33 +2344,30 @@ cifs_oplock_break(struct slow_work *work)
2303 LOCKING_ANDX_OPLOCK_RELEASE, false); 2344 LOCKING_ANDX_OPLOCK_RELEASE, false);
2304 cFYI(1, "Oplock release rc = %d", rc); 2345 cFYI(1, "Oplock release rc = %d", rc);
2305 } 2346 }
2347
2348 /*
2349 * We might have kicked in before is_valid_oplock_break()
2350 * finished grabbing reference for us. Make sure it's done by
2351 * waiting for GlobalSMSSeslock.
2352 */
2353 write_lock(&GlobalSMBSeslock);
2354 write_unlock(&GlobalSMBSeslock);
2355
2356 cifs_oplock_break_put(cfile);
2306} 2357}
2307 2358
2308static int 2359void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2309cifs_oplock_break_get(struct slow_work *work)
2310{ 2360{
2311 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2312 oplock_break);
2313 mntget(cfile->mnt); 2361 mntget(cfile->mnt);
2314 cifsFileInfo_get(cfile); 2362 cifsFileInfo_get(cfile);
2315 return 0;
2316} 2363}
2317 2364
2318static void 2365void cifs_oplock_break_put(struct cifsFileInfo *cfile)
2319cifs_oplock_break_put(struct slow_work *work)
2320{ 2366{
2321 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2322 oplock_break);
2323 mntput(cfile->mnt); 2367 mntput(cfile->mnt);
2324 cifsFileInfo_put(cfile); 2368 cifsFileInfo_put(cfile);
2325} 2369}
2326 2370
2327const struct slow_work_ops cifs_oplock_break_ops = {
2328 .get_ref = cifs_oplock_break_get,
2329 .put_ref = cifs_oplock_break_put,
2330 .execute = cifs_oplock_break,
2331};
2332
2333const struct address_space_operations cifs_addr_ops = { 2371const struct address_space_operations cifs_addr_ops = {
2334 .readpage = cifs_readpage, 2372 .readpage = cifs_readpage,
2335 .readpages = cifs_readpages, 2373 .readpages = cifs_readpages,
@@ -2338,6 +2376,8 @@ const struct address_space_operations cifs_addr_ops = {
2338 .write_begin = cifs_write_begin, 2376 .write_begin = cifs_write_begin,
2339 .write_end = cifs_write_end, 2377 .write_end = cifs_write_end,
2340 .set_page_dirty = __set_page_dirty_nobuffers, 2378 .set_page_dirty = __set_page_dirty_nobuffers,
2379 .releasepage = cifs_release_page,
2380 .invalidatepage = cifs_invalidate_page,
2341 /* .sync_page = cifs_sync_page, */ 2381 /* .sync_page = cifs_sync_page, */
2342 /* .direct_IO = */ 2382 /* .direct_IO = */
2343}; 2383};
@@ -2354,6 +2394,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2354 .write_begin = cifs_write_begin, 2394 .write_begin = cifs_write_begin,
2355 .write_end = cifs_write_end, 2395 .write_end = cifs_write_end,
2356 .set_page_dirty = __set_page_dirty_nobuffers, 2396 .set_page_dirty = __set_page_dirty_nobuffers,
2397 .releasepage = cifs_release_page,
2398 .invalidatepage = cifs_invalidate_page,
2357 /* .sync_page = cifs_sync_page, */ 2399 /* .sync_page = cifs_sync_page, */
2358 /* .direct_IO = */ 2400 /* .direct_IO = */
2359}; 2401};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
new file mode 100644
index 000000000000..9f3f5c4be161
--- /dev/null
+++ b/fs/cifs/fscache.c
@@ -0,0 +1,236 @@
1/*
2 * fs/cifs/fscache.c - CIFS filesystem cache interface
3 *
4 * Copyright (c) 2010 Novell, Inc.
5 * Author(s): Suresh Jayaraman (sjayaraman@suse.de>
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21#include "fscache.h"
22#include "cifsglob.h"
23#include "cifs_debug.h"
24#include "cifs_fs_sb.h"
25
26void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
27{
28 server->fscache =
29 fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
30 &cifs_fscache_server_index_def, server);
31 cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server,
32 server->fscache);
33}
34
35void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
36{
37 cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server,
38 server->fscache);
39 fscache_relinquish_cookie(server->fscache, 0);
40 server->fscache = NULL;
41}
42
43void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon)
44{
45 struct TCP_Server_Info *server = tcon->ses->server;
46
47 tcon->fscache =
48 fscache_acquire_cookie(server->fscache,
49 &cifs_fscache_super_index_def, tcon);
50 cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)",
51 server->fscache, tcon->fscache);
52}
53
54void cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon)
55{
56 cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache);
57 fscache_relinquish_cookie(tcon->fscache, 0);
58 tcon->fscache = NULL;
59}
60
61static void cifs_fscache_enable_inode_cookie(struct inode *inode)
62{
63 struct cifsInodeInfo *cifsi = CIFS_I(inode);
64 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
65
66 if (cifsi->fscache)
67 return;
68
69 cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
70 &cifs_fscache_inode_object_def,
71 cifsi);
72 cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)",
73 cifs_sb->tcon->fscache, cifsi->fscache);
74}
75
76void cifs_fscache_release_inode_cookie(struct inode *inode)
77{
78 struct cifsInodeInfo *cifsi = CIFS_I(inode);
79
80 if (cifsi->fscache) {
81 cFYI(1, "CIFS releasing inode cookie (0x%p)",
82 cifsi->fscache);
83 fscache_relinquish_cookie(cifsi->fscache, 0);
84 cifsi->fscache = NULL;
85 }
86}
87
88static void cifs_fscache_disable_inode_cookie(struct inode *inode)
89{
90 struct cifsInodeInfo *cifsi = CIFS_I(inode);
91
92 if (cifsi->fscache) {
93 cFYI(1, "CIFS disabling inode cookie (0x%p)",
94 cifsi->fscache);
95 fscache_relinquish_cookie(cifsi->fscache, 1);
96 cifsi->fscache = NULL;
97 }
98}
99
100void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
101{
102 if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
103 cifs_fscache_disable_inode_cookie(inode);
104 else {
105 cifs_fscache_enable_inode_cookie(inode);
106 cFYI(1, "CIFS: fscache inode cookie set");
107 }
108}
109
110void cifs_fscache_reset_inode_cookie(struct inode *inode)
111{
112 struct cifsInodeInfo *cifsi = CIFS_I(inode);
113 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
114 struct fscache_cookie *old = cifsi->fscache;
115
116 if (cifsi->fscache) {
117 /* retire the current fscache cache and get a new one */
118 fscache_relinquish_cookie(cifsi->fscache, 1);
119
120 cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
121 &cifs_fscache_inode_object_def,
122 cifsi);
123 cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p",
124 cifsi->fscache, old);
125 }
126}
127
128int cifs_fscache_release_page(struct page *page, gfp_t gfp)
129{
130 if (PageFsCache(page)) {
131 struct inode *inode = page->mapping->host;
132 struct cifsInodeInfo *cifsi = CIFS_I(inode);
133
134 cFYI(1, "CIFS: fscache release page (0x%p/0x%p)",
135 page, cifsi->fscache);
136 if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
137 return 0;
138 }
139
140 return 1;
141}
142
143static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
144 int error)
145{
146 cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)",
147 page, error);
148 if (!error)
149 SetPageUptodate(page);
150 unlock_page(page);
151}
152
153/*
154 * Retrieve a page from FS-Cache
155 */
156int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
157{
158 int ret;
159
160 cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p",
161 CIFS_I(inode)->fscache, page, inode);
162 ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
163 cifs_readpage_from_fscache_complete,
164 NULL,
165 GFP_KERNEL);
166 switch (ret) {
167
168 case 0: /* page found in fscache, read submitted */
169 cFYI(1, "CIFS: readpage_from_fscache: submitted");
170 return ret;
171 case -ENOBUFS: /* page won't be cached */
172 case -ENODATA: /* page not in cache */
173 cFYI(1, "CIFS: readpage_from_fscache %d", ret);
174 return 1;
175
176 default:
177 cERROR(1, "unknown error ret = %d", ret);
178 }
179 return ret;
180}
181
182/*
183 * Retrieve a set of pages from FS-Cache
184 */
185int __cifs_readpages_from_fscache(struct inode *inode,
186 struct address_space *mapping,
187 struct list_head *pages,
188 unsigned *nr_pages)
189{
190 int ret;
191
192 cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)",
193 CIFS_I(inode)->fscache, *nr_pages, inode);
194 ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
195 pages, nr_pages,
196 cifs_readpage_from_fscache_complete,
197 NULL,
198 mapping_gfp_mask(mapping));
199 switch (ret) {
200 case 0: /* read submitted to the cache for all pages */
201 cFYI(1, "CIFS: readpages_from_fscache: submitted");
202 return ret;
203
204 case -ENOBUFS: /* some pages are not cached and can't be */
205 case -ENODATA: /* some pages are not cached */
206 cFYI(1, "CIFS: readpages_from_fscache: no page");
207 return 1;
208
209 default:
210 cFYI(1, "unknown error ret = %d", ret);
211 }
212
213 return ret;
214}
215
216void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
217{
218 int ret;
219
220 cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p",
221 CIFS_I(inode)->fscache, page, inode);
222 ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
223 if (ret != 0)
224 fscache_uncache_page(CIFS_I(inode)->fscache, page);
225}
226
227void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
228{
229 struct cifsInodeInfo *cifsi = CIFS_I(inode);
230 struct fscache_cookie *cookie = cifsi->fscache;
231
232 cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie);
233 fscache_wait_on_page_write(cookie, page);
234 fscache_uncache_page(cookie, page);
235}
236
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
new file mode 100644
index 000000000000..31b88ec2341e
--- /dev/null
+++ b/fs/cifs/fscache.h
@@ -0,0 +1,136 @@
1/*
2 * fs/cifs/fscache.h - CIFS filesystem cache interface definitions
3 *
4 * Copyright (c) 2010 Novell, Inc.
5 * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21#ifndef _CIFS_FSCACHE_H
22#define _CIFS_FSCACHE_H
23
24#include <linux/fscache.h>
25
26#include "cifsglob.h"
27
28#ifdef CONFIG_CIFS_FSCACHE
29
30extern struct fscache_netfs cifs_fscache_netfs;
31extern const struct fscache_cookie_def cifs_fscache_server_index_def;
32extern const struct fscache_cookie_def cifs_fscache_super_index_def;
33extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
34
35extern int cifs_fscache_register(void);
36extern void cifs_fscache_unregister(void);
37
38/*
39 * fscache.c
40 */
41extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
42extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
43extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *);
44extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *);
45
46extern void cifs_fscache_release_inode_cookie(struct inode *);
47extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
48extern void cifs_fscache_reset_inode_cookie(struct inode *);
49
50extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
51extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
52extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
53extern int __cifs_readpages_from_fscache(struct inode *,
54 struct address_space *,
55 struct list_head *,
56 unsigned *);
57
58extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
59
60static inline void cifs_fscache_invalidate_page(struct page *page,
61 struct inode *inode)
62{
63 if (PageFsCache(page))
64 __cifs_fscache_invalidate_page(page, inode);
65}
66
67static inline int cifs_readpage_from_fscache(struct inode *inode,
68 struct page *page)
69{
70 if (CIFS_I(inode)->fscache)
71 return __cifs_readpage_from_fscache(inode, page);
72
73 return -ENOBUFS;
74}
75
76static inline int cifs_readpages_from_fscache(struct inode *inode,
77 struct address_space *mapping,
78 struct list_head *pages,
79 unsigned *nr_pages)
80{
81 if (CIFS_I(inode)->fscache)
82 return __cifs_readpages_from_fscache(inode, mapping, pages,
83 nr_pages);
84 return -ENOBUFS;
85}
86
87static inline void cifs_readpage_to_fscache(struct inode *inode,
88 struct page *page)
89{
90 if (PageFsCache(page))
91 __cifs_readpage_to_fscache(inode, page);
92}
93
94#else /* CONFIG_CIFS_FSCACHE */
95static inline int cifs_fscache_register(void) { return 0; }
96static inline void cifs_fscache_unregister(void) {}
97
98static inline void
99cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
100static inline void
101cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
102static inline void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) {}
103static inline void
104cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {}
105
106static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
107static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
108 struct file *filp) {}
109static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
110static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
111{
112 return 1; /* May release page */
113}
114
115static inline void cifs_fscache_invalidate_page(struct page *page,
116 struct inode *inode) {}
117static inline int
118cifs_readpage_from_fscache(struct inode *inode, struct page *page)
119{
120 return -ENOBUFS;
121}
122
123static inline int cifs_readpages_from_fscache(struct inode *inode,
124 struct address_space *mapping,
125 struct list_head *pages,
126 unsigned *nr_pages)
127{
128 return -ENOBUFS;
129}
130
131static inline void cifs_readpage_to_fscache(struct inode *inode,
132 struct page *page) {}
133
134#endif /* CONFIG_CIFS_FSCACHE */
135
136#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 6f0683c68952..4bc47e5b5f29 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -29,6 +29,7 @@
29#include "cifsproto.h" 29#include "cifsproto.h"
30#include "cifs_debug.h" 30#include "cifs_debug.h"
31#include "cifs_fs_sb.h" 31#include "cifs_fs_sb.h"
32#include "fscache.h"
32 33
33 34
34static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) 35static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
@@ -288,7 +289,7 @@ int cifs_get_file_info_unix(struct file *filp)
288 struct inode *inode = filp->f_path.dentry->d_inode; 289 struct inode *inode = filp->f_path.dentry->d_inode;
289 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 290 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
290 struct cifsTconInfo *tcon = cifs_sb->tcon; 291 struct cifsTconInfo *tcon = cifs_sb->tcon;
291 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 292 struct cifsFileInfo *cfile = filp->private_data;
292 293
293 xid = GetXid(); 294 xid = GetXid();
294 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); 295 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -515,7 +516,7 @@ int cifs_get_file_info(struct file *filp)
515 struct inode *inode = filp->f_path.dentry->d_inode; 516 struct inode *inode = filp->f_path.dentry->d_inode;
516 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 517 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
517 struct cifsTconInfo *tcon = cifs_sb->tcon; 518 struct cifsTconInfo *tcon = cifs_sb->tcon;
518 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 519 struct cifsFileInfo *cfile = filp->private_data;
519 520
520 xid = GetXid(); 521 xid = GetXid();
521 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); 522 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -723,18 +724,17 @@ cifs_find_inode(struct inode *inode, void *opaque)
723{ 724{
724 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; 725 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
725 726
727 /* don't match inode with different uniqueid */
726 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) 728 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
727 return 0; 729 return 0;
728 730
729 /* 731 /* don't match inode of different type */
730 * uh oh -- it's a directory. We can't use it since hardlinked dirs are 732 if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
731 * verboten. Disable serverino and return it as if it were found, the 733 return 0;
732 * caller can discard it, generate a uniqueid and retry the find 734
733 */ 735 /* if it's not a directory or has no dentries, then flag it */
734 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { 736 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry))
735 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; 737 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
736 cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
737 }
738 738
739 return 1; 739 return 1;
740} 740}
@@ -748,6 +748,27 @@ cifs_init_inode(struct inode *inode, void *opaque)
748 return 0; 748 return 0;
749} 749}
750 750
751/*
752 * walk dentry list for an inode and report whether it has aliases that
753 * are hashed. We use this to determine if a directory inode can actually
754 * be used.
755 */
756static bool
757inode_has_hashed_dentries(struct inode *inode)
758{
759 struct dentry *dentry;
760
761 spin_lock(&dcache_lock);
762 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
763 if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
764 spin_unlock(&dcache_lock);
765 return true;
766 }
767 }
768 spin_unlock(&dcache_lock);
769 return false;
770}
771
751/* Given fattrs, get a corresponding inode */ 772/* Given fattrs, get a corresponding inode */
752struct inode * 773struct inode *
753cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) 774cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
@@ -763,12 +784,16 @@ retry_iget5_locked:
763 784
764 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); 785 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
765 if (inode) { 786 if (inode) {
766 /* was there a problematic inode number collision? */ 787 /* was there a potentially problematic inode collision? */
767 if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { 788 if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
768 iput(inode);
769 fattr->cf_uniqueid = iunique(sb, ROOT_I);
770 fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; 789 fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
771 goto retry_iget5_locked; 790
791 if (inode_has_hashed_dentries(inode)) {
792 cifs_autodisable_serverino(CIFS_SB(sb));
793 iput(inode);
794 fattr->cf_uniqueid = iunique(sb, ROOT_I);
795 goto retry_iget5_locked;
796 }
772 } 797 }
773 798
774 cifs_fattr_to_inode(inode, fattr); 799 cifs_fattr_to_inode(inode, fattr);
@@ -776,6 +801,10 @@ retry_iget5_locked:
776 inode->i_flags |= S_NOATIME | S_NOCMTIME; 801 inode->i_flags |= S_NOATIME | S_NOCMTIME;
777 if (inode->i_state & I_NEW) { 802 if (inode->i_state & I_NEW) {
778 inode->i_ino = hash; 803 inode->i_ino = hash;
804#ifdef CONFIG_CIFS_FSCACHE
805 /* initialize per-inode cache cookie pointer */
806 CIFS_I(inode)->fscache = NULL;
807#endif
779 unlock_new_inode(inode); 808 unlock_new_inode(inode);
780 } 809 }
781 } 810 }
@@ -807,6 +836,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
807 if (!inode) 836 if (!inode)
808 return ERR_PTR(-ENOMEM); 837 return ERR_PTR(-ENOMEM);
809 838
839#ifdef CONFIG_CIFS_FSCACHE
840 /* populate tcon->resource_id */
841 cifs_sb->tcon->resource_id = CIFS_I(inode)->uniqueid;
842#endif
843
810 if (rc && cifs_sb->tcon->ipc) { 844 if (rc && cifs_sb->tcon->ipc) {
811 cFYI(1, "ipc connection - fake read inode"); 845 cFYI(1, "ipc connection - fake read inode");
812 inode->i_mode |= S_IFDIR; 846 inode->i_mode |= S_IFDIR;
@@ -1568,6 +1602,7 @@ cifs_invalidate_mapping(struct inode *inode)
1568 cifs_i->write_behind_rc = rc; 1602 cifs_i->write_behind_rc = rc;
1569 } 1603 }
1570 invalidate_remote_inode(inode); 1604 invalidate_remote_inode(inode);
1605 cifs_fscache_reset_inode_cookie(inode);
1571} 1606}
1572 1607
1573int cifs_revalidate_file(struct file *filp) 1608int cifs_revalidate_file(struct file *filp)
@@ -1663,26 +1698,16 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
1663 return rc; 1698 return rc;
1664} 1699}
1665 1700
1666static int cifs_vmtruncate(struct inode *inode, loff_t offset) 1701static void cifs_setsize(struct inode *inode, loff_t offset)
1667{ 1702{
1668 loff_t oldsize; 1703 loff_t oldsize;
1669 int err;
1670 1704
1671 spin_lock(&inode->i_lock); 1705 spin_lock(&inode->i_lock);
1672 err = inode_newsize_ok(inode, offset);
1673 if (err) {
1674 spin_unlock(&inode->i_lock);
1675 goto out;
1676 }
1677
1678 oldsize = inode->i_size; 1706 oldsize = inode->i_size;
1679 i_size_write(inode, offset); 1707 i_size_write(inode, offset);
1680 spin_unlock(&inode->i_lock); 1708 spin_unlock(&inode->i_lock);
1709
1681 truncate_pagecache(inode, oldsize, offset); 1710 truncate_pagecache(inode, oldsize, offset);
1682 if (inode->i_op->truncate)
1683 inode->i_op->truncate(inode);
1684out:
1685 return err;
1686} 1711}
1687 1712
1688static int 1713static int
@@ -1755,7 +1780,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1755 1780
1756 if (rc == 0) { 1781 if (rc == 0) {
1757 cifsInode->server_eof = attrs->ia_size; 1782 cifsInode->server_eof = attrs->ia_size;
1758 rc = cifs_vmtruncate(inode, attrs->ia_size); 1783 cifs_setsize(inode, attrs->ia_size);
1759 cifs_truncate_page(inode->i_mapping, inode->i_size); 1784 cifs_truncate_page(inode->i_mapping, inode->i_size);
1760 } 1785 }
1761 1786
@@ -1780,14 +1805,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1780 1805
1781 xid = GetXid(); 1806 xid = GetXid();
1782 1807
1783 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1808 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
1784 /* check if we have permission to change attrs */ 1809 attrs->ia_valid |= ATTR_FORCE;
1785 rc = inode_change_ok(inode, attrs); 1810
1786 if (rc < 0) 1811 rc = inode_change_ok(inode, attrs);
1787 goto out; 1812 if (rc < 0)
1788 else 1813 goto out;
1789 rc = 0;
1790 }
1791 1814
1792 full_path = build_path_from_dentry(direntry); 1815 full_path = build_path_from_dentry(direntry);
1793 if (full_path == NULL) { 1816 if (full_path == NULL) {
@@ -1873,18 +1896,24 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1873 CIFS_MOUNT_MAP_SPECIAL_CHR); 1896 CIFS_MOUNT_MAP_SPECIAL_CHR);
1874 } 1897 }
1875 1898
1876 if (!rc) { 1899 if (rc)
1877 rc = inode_setattr(inode, attrs); 1900 goto out;
1878 1901
1879 /* force revalidate when any of these times are set since some 1902 if ((attrs->ia_valid & ATTR_SIZE) &&
1880 of the fs types (eg ext3, fat) do not have fine enough 1903 attrs->ia_size != i_size_read(inode))
1881 time granularity to match protocol, and we do not have a 1904 truncate_setsize(inode, attrs->ia_size);
1882 a way (yet) to query the server fs's time granularity (and 1905
1883 whether it rounds times down). 1906 setattr_copy(inode, attrs);
1884 */ 1907 mark_inode_dirty(inode);
1885 if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))) 1908
1886 cifsInode->time = 0; 1909 /* force revalidate when any of these times are set since some
1887 } 1910 of the fs types (eg ext3, fat) do not have fine enough
1911 time granularity to match protocol, and we do not have a
1912 a way (yet) to query the server fs's time granularity (and
1913 whether it rounds times down).
1914 */
1915 if (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))
1916 cifsInode->time = 0;
1888out: 1917out:
1889 kfree(args); 1918 kfree(args);
1890 kfree(full_path); 1919 kfree(full_path);
@@ -1909,14 +1938,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1909 cFYI(1, "setattr on file %s attrs->iavalid 0x%x", 1938 cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
1910 direntry->d_name.name, attrs->ia_valid); 1939 direntry->d_name.name, attrs->ia_valid);
1911 1940
1912 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1941 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
1913 /* check if we have permission to change attrs */ 1942 attrs->ia_valid |= ATTR_FORCE;
1914 rc = inode_change_ok(inode, attrs); 1943
1915 if (rc < 0) { 1944 rc = inode_change_ok(inode, attrs);
1916 FreeXid(xid); 1945 if (rc < 0) {
1917 return rc; 1946 FreeXid(xid);
1918 } else 1947 return rc;
1919 rc = 0;
1920 } 1948 }
1921 1949
1922 full_path = build_path_from_dentry(direntry); 1950 full_path = build_path_from_dentry(direntry);
@@ -2024,8 +2052,17 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2024 2052
2025 /* do not need local check to inode_check_ok since the server does 2053 /* do not need local check to inode_check_ok since the server does
2026 that */ 2054 that */
2027 if (!rc) 2055 if (rc)
2028 rc = inode_setattr(inode, attrs); 2056 goto cifs_setattr_exit;
2057
2058 if ((attrs->ia_valid & ATTR_SIZE) &&
2059 attrs->ia_size != i_size_read(inode))
2060 truncate_setsize(inode, attrs->ia_size);
2061
2062 setattr_copy(inode, attrs);
2063 mark_inode_dirty(inode);
2064 return 0;
2065
2029cifs_setattr_exit: 2066cifs_setattr_exit:
2030 kfree(full_path); 2067 kfree(full_path);
2031 FreeXid(xid); 2068 FreeXid(xid);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 505926f1ee6b..9d38a71c8e14 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -41,8 +41,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
41 __u64 ExtAttrMask = 0; 41 __u64 ExtAttrMask = 0;
42 __u64 caps; 42 __u64 caps;
43 struct cifsTconInfo *tcon; 43 struct cifsTconInfo *tcon;
44 struct cifsFileInfo *pSMBFile = 44 struct cifsFileInfo *pSMBFile = filep->private_data;
45 (struct cifsFileInfo *)filep->private_data;
46#endif /* CONFIG_CIFS_POSIX */ 45#endif /* CONFIG_CIFS_POSIX */
47 46
48 xid = GetXid(); 47 xid = GetXid();
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1394aa37f26c..3ccadc1326d6 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -498,7 +498,6 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
498 struct cifsTconInfo *tcon; 498 struct cifsTconInfo *tcon;
499 struct cifsInodeInfo *pCifsInode; 499 struct cifsInodeInfo *pCifsInode;
500 struct cifsFileInfo *netfile; 500 struct cifsFileInfo *netfile;
501 int rc;
502 501
503 cFYI(1, "Checking for oplock break or dnotify response"); 502 cFYI(1, "Checking for oplock break or dnotify response");
504 if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && 503 if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) &&
@@ -583,13 +582,18 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
583 pCifsInode->clientCanCacheAll = false; 582 pCifsInode->clientCanCacheAll = false;
584 if (pSMB->OplockLevel == 0) 583 if (pSMB->OplockLevel == 0)
585 pCifsInode->clientCanCacheRead = false; 584 pCifsInode->clientCanCacheRead = false;
586 rc = slow_work_enqueue(&netfile->oplock_break); 585
587 if (rc) { 586 /*
588 cERROR(1, "failed to enqueue oplock " 587 * cifs_oplock_break_put() can't be called
589 "break: %d\n", rc); 588 * from here. Get reference after queueing
590 } else { 589 * succeeded. cifs_oplock_break() will
591 netfile->oplock_break_cancelled = false; 590 * synchronize using GlobalSMSSeslock.
592 } 591 */
592 if (queue_work(system_nrt_wq,
593 &netfile->oplock_break))
594 cifs_oplock_break_get(netfile);
595 netfile->oplock_break_cancelled = false;
596
593 read_unlock(&GlobalSMBSeslock); 597 read_unlock(&GlobalSMBSeslock);
594 read_unlock(&cifs_tcp_ses_lock); 598 read_unlock(&cifs_tcp_ses_lock);
595 return true; 599 return true;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index d35d52889cb5..f97851119e6c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -61,6 +61,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
61 {ERRremcd, -EACCES}, 61 {ERRremcd, -EACCES},
62 {ERRdiffdevice, -EXDEV}, 62 {ERRdiffdevice, -EXDEV},
63 {ERRnofiles, -ENOENT}, 63 {ERRnofiles, -ENOENT},
64 {ERRwriteprot, -EROFS},
64 {ERRbadshare, -ETXTBSY}, 65 {ERRbadshare, -ETXTBSY},
65 {ERRlock, -EACCES}, 66 {ERRlock, -EACCES},
66 {ERRunsup, -EINVAL}, 67 {ERRunsup, -EINVAL},
@@ -139,17 +140,18 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
139 * Returns 0 on failure. 140 * Returns 0 on failure.
140 */ 141 */
141static int 142static int
142cifs_inet_pton(const int address_family, const char *cp, void *dst) 143cifs_inet_pton(const int address_family, const char *cp, int len, void *dst)
143{ 144{
144 int ret = 0; 145 int ret = 0;
145 146
146 /* calculate length by finding first slash or NULL */ 147 /* calculate length by finding first slash or NULL */
147 if (address_family == AF_INET) 148 if (address_family == AF_INET)
148 ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL); 149 ret = in4_pton(cp, len, dst, '\\', NULL);
149 else if (address_family == AF_INET6) 150 else if (address_family == AF_INET6)
150 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); 151 ret = in6_pton(cp, len, dst , '\\', NULL);
151 152
152 cFYI(DBG2, "address conversion returned %d for %s", ret, cp); 153 cFYI(DBG2, "address conversion returned %d for %*.*s",
154 ret, len, len, cp);
153 if (ret > 0) 155 if (ret > 0)
154 ret = 1; 156 ret = 1;
155 return ret; 157 return ret;
@@ -164,43 +166,66 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
164 * Returns 0 on failure. 166 * Returns 0 on failure.
165 */ 167 */
166int 168int
167cifs_convert_address(char *src, void *dst) 169cifs_convert_address(struct sockaddr *dst, const char *src, int len)
168{ 170{
169 int rc; 171 int rc, alen, slen;
170 char *pct, *endp; 172 const char *pct;
173 char *endp, scope_id[13];
171 struct sockaddr_in *s4 = (struct sockaddr_in *) dst; 174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
172 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; 175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
173 176
174 /* IPv4 address */ 177 /* IPv4 address */
175 if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) { 178 if (cifs_inet_pton(AF_INET, src, len, &s4->sin_addr.s_addr)) {
176 s4->sin_family = AF_INET; 179 s4->sin_family = AF_INET;
177 return 1; 180 return 1;
178 } 181 }
179 182
180 /* temporarily terminate string */ 183 /* attempt to exclude the scope ID from the address part */
181 pct = strchr(src, '%'); 184 pct = memchr(src, '%', len);
182 if (pct) 185 alen = pct ? pct - src : len;
183 *pct = '\0';
184
185 rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr);
186
187 /* repair temp termination (if any) and make pct point to scopeid */
188 if (pct)
189 *pct++ = '%';
190 186
187 rc = cifs_inet_pton(AF_INET6, src, alen, &s6->sin6_addr.s6_addr);
191 if (!rc) 188 if (!rc)
192 return rc; 189 return rc;
193 190
194 s6->sin6_family = AF_INET6; 191 s6->sin6_family = AF_INET6;
195 if (pct) { 192 if (pct) {
193 /* grab the scope ID */
194 slen = len - (alen + 1);
195 if (slen <= 0 || slen > 12)
196 return 0;
197 memcpy(scope_id, pct + 1, slen);
198 scope_id[slen] = '\0';
199
196 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); 200 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
197 if (!*pct || *endp) 201 if (endp != scope_id + slen)
198 return 0; 202 return 0;
199 } 203 }
200 204
201 return rc; 205 return rc;
202} 206}
203 207
208int
209cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
210 const unsigned short int port)
211{
212 if (!cifs_convert_address(dst, src, len))
213 return 0;
214
215 switch (dst->sa_family) {
216 case AF_INET:
217 ((struct sockaddr_in *)dst)->sin_port = htons(port);
218 break;
219 case AF_INET6:
220 ((struct sockaddr_in6 *)dst)->sin6_port = htons(port);
221 break;
222 default:
223 return 0;
224 }
225
226 return 1;
227}
228
204/***************************************************************************** 229/*****************************************************************************
205convert a NT status code to a dos class/code 230convert a NT status code to a dos class/code
206 *****************************************************************************/ 231 *****************************************************************************/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index daf1753af674..d5e591fab475 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -847,6 +847,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
847 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; 847 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
848 848
849 tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); 849 tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
850 if (tmp_buf == NULL) {
851 rc = -ENOMEM;
852 break;
853 }
854
850 for (i = 0; (i < num_to_fill) && (rc == 0); i++) { 855 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
851 if (current_entry == NULL) { 856 if (current_entry == NULL) {
852 /* evaluate whether this case is an error */ 857 /* evaluate whether this case is an error */
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index c5084d27db7c..7f16cb825fe5 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -76,6 +76,7 @@
76#define ERRnofiles 18 /* A File Search command can find no 76#define ERRnofiles 18 /* A File Search command can find no
77 more files matching the specified 77 more files matching the specified
78 criteria. */ 78 criteria. */
79#define ERRwriteprot 19 /* media is write protected */
79#define ERRgeneral 31 80#define ERRgeneral 31
80#define ERRbadshare 32 /* The sharing mode specified for an 81#define ERRbadshare 32 /* The sharing mode specified for an
81 Open conflicts with existing FIDs on 82 Open conflicts with existing FIDs on
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d97f9935a028..6526e6f21ecf 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -35,7 +35,7 @@
35#include "coda_int.h" 35#include "coda_int.h"
36 36
37/* VFS super_block ops */ 37/* VFS super_block ops */
38static void coda_clear_inode(struct inode *); 38static void coda_evict_inode(struct inode *);
39static void coda_put_super(struct super_block *); 39static void coda_put_super(struct super_block *);
40static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); 40static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
41 41
@@ -93,7 +93,7 @@ static const struct super_operations coda_super_operations =
93{ 93{
94 .alloc_inode = coda_alloc_inode, 94 .alloc_inode = coda_alloc_inode,
95 .destroy_inode = coda_destroy_inode, 95 .destroy_inode = coda_destroy_inode,
96 .clear_inode = coda_clear_inode, 96 .evict_inode = coda_evict_inode,
97 .put_super = coda_put_super, 97 .put_super = coda_put_super,
98 .statfs = coda_statfs, 98 .statfs = coda_statfs,
99 .remount_fs = coda_remount, 99 .remount_fs = coda_remount,
@@ -224,8 +224,10 @@ static void coda_put_super(struct super_block *sb)
224 printk("Coda: Bye bye.\n"); 224 printk("Coda: Bye bye.\n");
225} 225}
226 226
227static void coda_clear_inode(struct inode *inode) 227static void coda_evict_inode(struct inode *inode)
228{ 228{
229 truncate_inode_pages(&inode->i_data, 0);
230 end_writeback(inode);
229 coda_cache_clear_inode(inode); 231 coda_cache_clear_inode(inode);
230} 232}
231 233
diff --git a/fs/compat.c b/fs/compat.c
index ce02278b9c83..e6d5d70cf3cf 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -8,13 +8,14 @@
8 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) 8 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
9 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) 9 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
10 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs 10 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
11 * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) 11 * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
12 * 12 *
13 * This program is free software; you can redistribute it and/or modify 13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2 as 14 * it under the terms of the GNU General Public License version 2 as
15 * published by the Free Software Foundation. 15 * published by the Free Software Foundation.
16 */ 16 */
17 17
18#include <linux/stddef.h>
18#include <linux/kernel.h> 19#include <linux/kernel.h>
19#include <linux/linkage.h> 20#include <linux/linkage.h>
20#include <linux/compat.h> 21#include <linux/compat.h>
@@ -266,7 +267,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
266 error = user_path(pathname, &path); 267 error = user_path(pathname, &path);
267 if (!error) { 268 if (!error) {
268 struct kstatfs tmp; 269 struct kstatfs tmp;
269 error = vfs_statfs(path.dentry, &tmp); 270 error = vfs_statfs(&path, &tmp);
270 if (!error) 271 if (!error)
271 error = put_compat_statfs(buf, &tmp); 272 error = put_compat_statfs(buf, &tmp);
272 path_put(&path); 273 path_put(&path);
@@ -284,7 +285,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
284 file = fget(fd); 285 file = fget(fd);
285 if (!file) 286 if (!file)
286 goto out; 287 goto out;
287 error = vfs_statfs(file->f_path.dentry, &tmp); 288 error = vfs_statfs(&file->f_path, &tmp);
288 if (!error) 289 if (!error)
289 error = put_compat_statfs(buf, &tmp); 290 error = put_compat_statfs(buf, &tmp);
290 fput(file); 291 fput(file);
@@ -334,7 +335,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
334 error = user_path(pathname, &path); 335 error = user_path(pathname, &path);
335 if (!error) { 336 if (!error) {
336 struct kstatfs tmp; 337 struct kstatfs tmp;
337 error = vfs_statfs(path.dentry, &tmp); 338 error = vfs_statfs(&path, &tmp);
338 if (!error) 339 if (!error)
339 error = put_compat_statfs64(buf, &tmp); 340 error = put_compat_statfs64(buf, &tmp);
340 path_put(&path); 341 path_put(&path);
@@ -355,7 +356,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
355 file = fget(fd); 356 file = fget(fd);
356 if (!file) 357 if (!file)
357 goto out; 358 goto out;
358 error = vfs_statfs(file->f_path.dentry, &tmp); 359 error = vfs_statfs(&file->f_path, &tmp);
359 if (!error) 360 if (!error)
360 error = put_compat_statfs64(buf, &tmp); 361 error = put_compat_statfs64(buf, &tmp);
361 fput(file); 362 fput(file);
@@ -378,7 +379,7 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
378 sb = user_get_super(new_decode_dev(dev)); 379 sb = user_get_super(new_decode_dev(dev));
379 if (!sb) 380 if (!sb)
380 return -EINVAL; 381 return -EINVAL;
381 err = vfs_statfs(sb->s_root, &sbuf); 382 err = statfs_by_dentry(sb->s_root, &sbuf);
382 drop_super(sb); 383 drop_super(sb);
383 if (err) 384 if (err)
384 return err; 385 return err;
@@ -891,8 +892,6 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
891 return retval; 892 return retval;
892} 893}
893 894
894#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
895
896struct compat_old_linux_dirent { 895struct compat_old_linux_dirent {
897 compat_ulong_t d_ino; 896 compat_ulong_t d_ino;
898 compat_ulong_t d_offset; 897 compat_ulong_t d_offset;
@@ -981,7 +980,8 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
981 struct compat_linux_dirent __user * dirent; 980 struct compat_linux_dirent __user * dirent;
982 struct compat_getdents_callback *buf = __buf; 981 struct compat_getdents_callback *buf = __buf;
983 compat_ulong_t d_ino; 982 compat_ulong_t d_ino;
984 int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t)); 983 int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
984 namlen + 2, sizeof(compat_long_t));
985 985
986 buf->error = -EINVAL; /* only used if we fail.. */ 986 buf->error = -EINVAL; /* only used if we fail.. */
987 if (reclen > buf->count) 987 if (reclen > buf->count)
@@ -1068,8 +1068,8 @@ static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t
1068{ 1068{
1069 struct linux_dirent64 __user *dirent; 1069 struct linux_dirent64 __user *dirent;
1070 struct compat_getdents_callback64 *buf = __buf; 1070 struct compat_getdents_callback64 *buf = __buf;
1071 int jj = NAME_OFFSET(dirent); 1071 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
1072 int reclen = ALIGN(jj + namlen + 1, sizeof(u64)); 1072 sizeof(u64));
1073 u64 off; 1073 u64 off;
1074 1074
1075 buf->error = -EINVAL; /* only used if we fail.. */ 1075 buf->error = -EINVAL; /* only used if we fail.. */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 641640dc7ae5..63ae85831464 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -4,7 +4,7 @@
4 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) 4 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
5 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) 5 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
6 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs 6 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
7 * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) 7 * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
8 * 8 *
9 * These routines maintain argument size conversion between 32bit and 64bit 9 * These routines maintain argument size conversion between 32bit and 64bit
10 * ioctls. 10 * ioctls.
@@ -601,8 +601,11 @@ static int ioc_settimeout(unsigned int fd, unsigned int cmd,
601} 601}
602 602
603/* Bluetooth ioctls */ 603/* Bluetooth ioctls */
604#define HCIUARTSETPROTO _IOW('U', 200, int) 604#define HCIUARTSETPROTO _IOW('U', 200, int)
605#define HCIUARTGETPROTO _IOR('U', 201, int) 605#define HCIUARTGETPROTO _IOR('U', 201, int)
606#define HCIUARTGETDEVICE _IOR('U', 202, int)
607#define HCIUARTSETFLAGS _IOW('U', 203, int)
608#define HCIUARTGETFLAGS _IOR('U', 204, int)
606 609
607#define BNEPCONNADD _IOW('B', 200, int) 610#define BNEPCONNADD _IOW('B', 200, int)
608#define BNEPCONNDEL _IOW('B', 201, int) 611#define BNEPCONNDEL _IOW('B', 201, int)
@@ -1328,6 +1331,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL)
1328COMPATIBLE_IOCTL(HCISETLINKMODE) 1331COMPATIBLE_IOCTL(HCISETLINKMODE)
1329COMPATIBLE_IOCTL(HCISETACLMTU) 1332COMPATIBLE_IOCTL(HCISETACLMTU)
1330COMPATIBLE_IOCTL(HCISETSCOMTU) 1333COMPATIBLE_IOCTL(HCISETSCOMTU)
1334COMPATIBLE_IOCTL(HCIBLOCKADDR)
1335COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
1331COMPATIBLE_IOCTL(HCIINQUIRY) 1336COMPATIBLE_IOCTL(HCIINQUIRY)
1332COMPATIBLE_IOCTL(HCIUARTSETPROTO) 1337COMPATIBLE_IOCTL(HCIUARTSETPROTO)
1333COMPATIBLE_IOCTL(HCIUARTGETPROTO) 1338COMPATIBLE_IOCTL(HCIUARTGETPROTO)
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index dd3634e4c967..a53b130b366c 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,66 +39,55 @@ static DEFINE_MUTEX(read_mutex);
39#define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1) 39#define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1)
40#define OFFSET(x) ((x)->i_ino) 40#define OFFSET(x) ((x)->i_ino)
41 41
42 42static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode)
43static int cramfs_iget5_test(struct inode *inode, void *opaque)
44{
45 struct cramfs_inode *cramfs_inode = opaque;
46 return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
47}
48
49static int cramfs_iget5_set(struct inode *inode, void *opaque)
50{ 43{
51 struct cramfs_inode *cramfs_inode = opaque; 44 static struct timespec zerotime;
52 inode->i_ino = CRAMINO(cramfs_inode); 45 inode->i_mode = cramfs_inode->mode;
53 return 0; 46 inode->i_uid = cramfs_inode->uid;
47 inode->i_size = cramfs_inode->size;
48 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
49 inode->i_gid = cramfs_inode->gid;
50 /* Struct copy intentional */
51 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
52 /* inode->i_nlink is left 1 - arguably wrong for directories,
53 but it's the best we can do without reading the directory
54 contents. 1 yields the right result in GNU find, even
55 without -noleaf option. */
56 if (S_ISREG(inode->i_mode)) {
57 inode->i_fop = &generic_ro_fops;
58 inode->i_data.a_ops = &cramfs_aops;
59 } else if (S_ISDIR(inode->i_mode)) {
60 inode->i_op = &cramfs_dir_inode_operations;
61 inode->i_fop = &cramfs_directory_operations;
62 } else if (S_ISLNK(inode->i_mode)) {
63 inode->i_op = &page_symlink_inode_operations;
64 inode->i_data.a_ops = &cramfs_aops;
65 } else {
66 init_special_inode(inode, inode->i_mode,
67 old_decode_dev(cramfs_inode->size));
68 }
54} 69}
55 70
56static struct inode *get_cramfs_inode(struct super_block *sb, 71static struct inode *get_cramfs_inode(struct super_block *sb,
57 struct cramfs_inode * cramfs_inode) 72 struct cramfs_inode * cramfs_inode)
58{ 73{
59 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), 74 struct inode *inode;
60 cramfs_iget5_test, cramfs_iget5_set, 75 if (CRAMINO(cramfs_inode) == 1) {
61 cramfs_inode); 76 inode = new_inode(sb);
62 static struct timespec zerotime; 77 if (inode) {
63 78 inode->i_ino = 1;
64 if (inode && (inode->i_state & I_NEW)) { 79 setup_inode(inode, cramfs_inode);
65 inode->i_mode = cramfs_inode->mode; 80 }
66 inode->i_uid = cramfs_inode->uid; 81 } else {
67 inode->i_size = cramfs_inode->size; 82 inode = iget_locked(sb, CRAMINO(cramfs_inode));
68 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; 83 if (inode) {
69 inode->i_gid = cramfs_inode->gid; 84 setup_inode(inode, cramfs_inode);
70 /* Struct copy intentional */ 85 unlock_new_inode(inode);
71 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
72 /* inode->i_nlink is left 1 - arguably wrong for directories,
73 but it's the best we can do without reading the directory
74 contents. 1 yields the right result in GNU find, even
75 without -noleaf option. */
76 if (S_ISREG(inode->i_mode)) {
77 inode->i_fop = &generic_ro_fops;
78 inode->i_data.a_ops = &cramfs_aops;
79 } else if (S_ISDIR(inode->i_mode)) {
80 inode->i_op = &cramfs_dir_inode_operations;
81 inode->i_fop = &cramfs_directory_operations;
82 } else if (S_ISLNK(inode->i_mode)) {
83 inode->i_op = &page_symlink_inode_operations;
84 inode->i_data.a_ops = &cramfs_aops;
85 } else {
86 init_special_inode(inode, inode->i_mode,
87 old_decode_dev(cramfs_inode->size));
88 } 86 }
89 unlock_new_inode(inode);
90 } 87 }
91 return inode; 88 return inode;
92} 89}
93 90
94static void cramfs_drop_inode(struct inode *inode)
95{
96 if (inode->i_ino == 1)
97 generic_delete_inode(inode);
98 else
99 generic_drop_inode(inode);
100}
101
102/* 91/*
103 * We have our own block cache: don't fill up the buffer cache 92 * We have our own block cache: don't fill up the buffer cache
104 * with the rom-image, because the way the filesystem is set 93 * with the rom-image, because the way the filesystem is set
@@ -542,7 +531,6 @@ static const struct super_operations cramfs_ops = {
542 .put_super = cramfs_put_super, 531 .put_super = cramfs_put_super,
543 .remount_fs = cramfs_remount, 532 .remount_fs = cramfs_remount,
544 .statfs = cramfs_statfs, 533 .statfs = cramfs_statfs,
545 .drop_inode = cramfs_drop_inode,
546}; 534};
547 535
548static int cramfs_get_sb(struct file_system_type *fs_type, 536static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index 86d4db15473e..9f2c13417969 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
536 */ 536 */
537static void prune_dcache(int count) 537static void prune_dcache(int count)
538{ 538{
539 struct super_block *sb, *n; 539 struct super_block *sb, *p = NULL;
540 int w_count; 540 int w_count;
541 int unused = dentry_stat.nr_unused; 541 int unused = dentry_stat.nr_unused;
542 int prune_ratio; 542 int prune_ratio;
@@ -550,7 +550,7 @@ static void prune_dcache(int count)
550 else 550 else
551 prune_ratio = unused / count; 551 prune_ratio = unused / count;
552 spin_lock(&sb_lock); 552 spin_lock(&sb_lock);
553 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 553 list_for_each_entry(sb, &super_blocks, s_list) {
554 if (list_empty(&sb->s_instances)) 554 if (list_empty(&sb->s_instances))
555 continue; 555 continue;
556 if (sb->s_nr_dentry_unused == 0) 556 if (sb->s_nr_dentry_unused == 0)
@@ -590,14 +590,16 @@ static void prune_dcache(int count)
590 up_read(&sb->s_umount); 590 up_read(&sb->s_umount);
591 } 591 }
592 spin_lock(&sb_lock); 592 spin_lock(&sb_lock);
593 /* lock was dropped, must reset next */ 593 if (p)
594 list_safe_reset_next(sb, n, s_list); 594 __put_super(p);
595 count -= pruned; 595 count -= pruned;
596 __put_super(sb); 596 p = sb;
597 /* more work left to do? */ 597 /* more work left to do? */
598 if (count <= 0) 598 if (count <= 0)
599 break; 599 break;
600 } 600 }
601 if (p)
602 __put_super(p);
601 spin_unlock(&sb_lock); 603 spin_unlock(&sb_lock);
602 spin_unlock(&dcache_lock); 604 spin_unlock(&dcache_lock);
603} 605}
@@ -2049,16 +2051,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2049/* 2051/*
2050 * Write full pathname from the root of the filesystem into the buffer. 2052 * Write full pathname from the root of the filesystem into the buffer.
2051 */ 2053 */
2052char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2054char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2053{ 2055{
2054 char *end = buf + buflen; 2056 char *end = buf + buflen;
2055 char *retval; 2057 char *retval;
2056 2058
2057 spin_lock(&dcache_lock);
2058 prepend(&end, &buflen, "\0", 1); 2059 prepend(&end, &buflen, "\0", 1);
2059 if (d_unlinked(dentry) &&
2060 (prepend(&end, &buflen, "//deleted", 9) != 0))
2061 goto Elong;
2062 if (buflen < 1) 2060 if (buflen < 1)
2063 goto Elong; 2061 goto Elong;
2064 /* Get '/' right */ 2062 /* Get '/' right */
@@ -2076,7 +2074,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2076 retval = end; 2074 retval = end;
2077 dentry = parent; 2075 dentry = parent;
2078 } 2076 }
2077 return retval;
2078Elong:
2079 return ERR_PTR(-ENAMETOOLONG);
2080}
2081EXPORT_SYMBOL(__dentry_path);
2082
2083char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2084{
2085 char *p = NULL;
2086 char *retval;
2087
2088 spin_lock(&dcache_lock);
2089 if (d_unlinked(dentry)) {
2090 p = buf + buflen;
2091 if (prepend(&p, &buflen, "//deleted", 10) != 0)
2092 goto Elong;
2093 buflen++;
2094 }
2095 retval = __dentry_path(dentry, buf, buflen);
2079 spin_unlock(&dcache_lock); 2096 spin_unlock(&dcache_lock);
2097 if (!IS_ERR(retval) && p)
2098 *p = '/'; /* restore '/' overriden with '\0' */
2080 return retval; 2099 return retval;
2081Elong: 2100Elong:
2082 spin_unlock(&dcache_lock); 2101 spin_unlock(&dcache_lock);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aacf531d..51f270b479b6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
218 * filesystems can use it to hold additional state between get_block calls and 218 * filesystems can use it to hold additional state between get_block calls and
219 * dio_complete. 219 * dio_complete.
220 */ 220 */
221static int dio_complete(struct dio *dio, loff_t offset, int ret) 221static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
222{ 222{
223 ssize_t transferred = 0; 223 ssize_t transferred = 0;
224 224
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
239 transferred = dio->i_size - offset; 239 transferred = dio->i_size - offset;
240 } 240 }
241 241
242 if (dio->end_io && dio->result)
243 dio->end_io(dio->iocb, offset, transferred,
244 dio->map_bh.b_private);
245
246 if (dio->flags & DIO_LOCKING)
247 /* lockdep: non-owner release */
248 up_read_non_owner(&dio->inode->i_alloc_sem);
249
250 if (ret == 0) 242 if (ret == 0)
251 ret = dio->page_errors; 243 ret = dio->page_errors;
252 if (ret == 0) 244 if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
254 if (ret == 0) 246 if (ret == 0)
255 ret = transferred; 247 ret = transferred;
256 248
249 if (dio->end_io && dio->result) {
250 dio->end_io(dio->iocb, offset, transferred,
251 dio->map_bh.b_private, ret, is_async);
252 } else if (is_async) {
253 aio_complete(dio->iocb, ret, 0);
254 }
255
256 if (dio->flags & DIO_LOCKING)
257 /* lockdep: non-owner release */
258 up_read_non_owner(&dio->inode->i_alloc_sem);
259
257 return ret; 260 return ret;
258} 261}
259 262
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
277 spin_unlock_irqrestore(&dio->bio_lock, flags); 280 spin_unlock_irqrestore(&dio->bio_lock, flags);
278 281
279 if (remaining == 0) { 282 if (remaining == 0) {
280 int ret = dio_complete(dio, dio->iocb->ki_pos, 0); 283 dio_complete(dio, dio->iocb->ki_pos, 0, true);
281 aio_complete(dio->iocb, ret, 0);
282 kfree(dio); 284 kfree(dio);
283 } 285 }
284} 286}
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1126 spin_unlock_irqrestore(&dio->bio_lock, flags); 1128 spin_unlock_irqrestore(&dio->bio_lock, flags);
1127 1129
1128 if (ret2 == 0) { 1130 if (ret2 == 0) {
1129 ret = dio_complete(dio, offset, ret); 1131 ret = dio_complete(dio, offset, ret, false);
1130 kfree(dio); 1132 kfree(dio);
1131 } else 1133 } else
1132 BUG_ON(ret != -EIOCBQUEUED); 1134 BUG_ON(ret != -EIOCBQUEUED);
@@ -1134,8 +1136,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1134 return ret; 1136 return ret;
1135} 1137}
1136 1138
1139/*
1140 * This is a library function for use by filesystem drivers.
1141 *
1142 * The locking rules are governed by the flags parameter:
1143 * - if the flags value contains DIO_LOCKING we use a fancy locking
1144 * scheme for dumb filesystems.
1145 * For writes this function is called under i_mutex and returns with
1146 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1147 * taken and dropped again before returning.
1148 * For reads and writes i_alloc_sem is taken in shared mode and released
1149 * on I/O completion (which may happen asynchronously after returning to
1150 * the caller).
1151 *
1152 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1153 * internal locking but rather rely on the filesystem to synchronize
1154 * direct I/O reads/writes versus each other and truncate.
1155 * For reads and writes both i_mutex and i_alloc_sem are not held on
1156 * entry and are never taken.
1157 */
1137ssize_t 1158ssize_t
1138__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, 1159__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1139 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1160 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1140 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1161 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1141 dio_submit_t submit_io, int flags) 1162 dio_submit_t submit_io, int flags)
@@ -1231,57 +1252,4 @@ __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
1231out: 1252out:
1232 return retval; 1253 return retval;
1233} 1254}
1234EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
1235
1236/*
1237 * This is a library function for use by filesystem drivers.
1238 *
1239 * The locking rules are governed by the flags parameter:
1240 * - if the flags value contains DIO_LOCKING we use a fancy locking
1241 * scheme for dumb filesystems.
1242 * For writes this function is called under i_mutex and returns with
1243 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1244 * taken and dropped again before returning.
1245 * For reads and writes i_alloc_sem is taken in shared mode and released
1246 * on I/O completion (which may happen asynchronously after returning to
1247 * the caller).
1248 *
1249 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1250 * internal locking but rather rely on the filesystem to synchronize
1251 * direct I/O reads/writes versus each other and truncate.
1252 * For reads and writes both i_mutex and i_alloc_sem are not held on
1253 * entry and are never taken.
1254 */
1255ssize_t
1256__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1257 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1258 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1259 dio_submit_t submit_io, int flags)
1260{
1261 ssize_t retval;
1262
1263 retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
1264 offset, nr_segs, get_block, end_io, submit_io, flags);
1265 /*
1266 * In case of error extending write may have instantiated a few
1267 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1268 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
1269 * their own manner. This is a further example of where the old
1270 * truncate sequence is inadequate.
1271 *
1272 * NOTE: filesystems with their own locking have to handle this
1273 * on their own.
1274 */
1275 if (flags & DIO_LOCKING) {
1276 if (unlikely((rw & WRITE) && retval < 0)) {
1277 loff_t isize = i_size_read(inode);
1278 loff_t end = offset + iov_length(iov, nr_segs);
1279
1280 if (end > isize)
1281 vmtruncate(inode, isize);
1282 }
1283 }
1284
1285 return retval;
1286}
1287EXPORT_SYMBOL(__blockdev_direct_IO); 1255EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index c0d35c620526..37a34c2c622a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -248,7 +248,7 @@ static struct connection *assoc2con(int assoc_id)
248 248
249 for (i = 0 ; i < CONN_HASH_SIZE; i++) { 249 for (i = 0 ; i < CONN_HASH_SIZE; i++) {
250 hlist_for_each_entry(con, h, &connection_hash[i], list) { 250 hlist_for_each_entry(con, h, &connection_hash[i], list) {
251 if (con && con->sctp_assoc == assoc_id) { 251 if (con->sctp_assoc == assoc_id) {
252 mutex_unlock(&connections_lock); 252 mutex_unlock(&connections_lock);
253 return con; 253 return con;
254 } 254 }
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 2c6ad518100d..ef17e0169da1 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -81,24 +81,11 @@ static struct genl_ops dlm_nl_ops = {
81 81
82int __init dlm_netlink_init(void) 82int __init dlm_netlink_init(void)
83{ 83{
84 int rv; 84 return genl_register_family_with_ops(&family, &dlm_nl_ops, 1);
85
86 rv = genl_register_family(&family);
87 if (rv)
88 return rv;
89
90 rv = genl_register_ops(&family, &dlm_nl_ops);
91 if (rv < 0)
92 goto err;
93 return 0;
94 err:
95 genl_unregister_family(&family);
96 return rv;
97} 85}
98 86
99void dlm_netlink_exit(void) 87void dlm_netlink_exit(void)
100{ 88{
101 genl_unregister_ops(&family, &dlm_nl_ops);
102 genl_unregister_family(&family); 89 genl_unregister_family(&family);
103} 90}
104 91
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 83c4f600786a..2195c213ab2f 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
18 18
19 spin_lock(&inode_lock); 19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
22 continue; 22 continue;
23 if (inode->i_mapping->nrpages == 0) 23 if (inode->i_mapping->nrpages == 0)
24 continue; 24 continue;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 1cc087635a5e..a2e3b562e65d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -762,7 +762,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
762 762
763/** 763/**
764 * ecryptfs_init_crypt_ctx 764 * ecryptfs_init_crypt_ctx
765 * @crypt_stat: Uninitilized crypt stats structure 765 * @crypt_stat: Uninitialized crypt stats structure
766 * 766 *
767 * Initialize the crypto context. 767 * Initialize the crypto context.
768 * 768 *
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 31ef5252f0fe..82900b063b1e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -804,10 +804,20 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
804 size_t num_zeros = (PAGE_CACHE_SIZE 804 size_t num_zeros = (PAGE_CACHE_SIZE
805 - (ia->ia_size & ~PAGE_CACHE_MASK)); 805 - (ia->ia_size & ~PAGE_CACHE_MASK));
806 806
807
808 /*
809 * XXX(truncate) this should really happen at the begginning
810 * of ->setattr. But the code is too messy to that as part
811 * of a larger patch. ecryptfs is also totally missing out
812 * on the inode_change_ok check at the beginning of
813 * ->setattr while would include this.
814 */
815 rc = inode_newsize_ok(inode, ia->ia_size);
816 if (rc)
817 goto out;
818
807 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 819 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
808 rc = simple_setsize(inode, ia->ia_size); 820 truncate_setsize(inode, ia->ia_size);
809 if (rc)
810 goto out;
811 lower_ia->ia_size = ia->ia_size; 821 lower_ia->ia_size = ia->ia_size;
812 lower_ia->ia_valid |= ATTR_SIZE; 822 lower_ia->ia_valid |= ATTR_SIZE;
813 goto out; 823 goto out;
@@ -830,7 +840,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
830 goto out; 840 goto out;
831 } 841 }
832 } 842 }
833 simple_setsize(inode, ia->ia_size); 843 truncate_setsize(inode, ia->ia_size);
834 rc = ecryptfs_write_inode_size_to_metadata(inode); 844 rc = ecryptfs_write_inode_size_to_metadata(inode);
835 if (rc) { 845 if (rc) {
836 printk(KERN_ERR "Problem with " 846 printk(KERN_ERR "Problem with "
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 2d8dbce9d485..46c4dd8dfcc3 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -31,9 +31,9 @@ static struct mutex ecryptfs_msg_ctx_lists_mux;
31 31
32static struct hlist_head *ecryptfs_daemon_hash; 32static struct hlist_head *ecryptfs_daemon_hash;
33struct mutex ecryptfs_daemon_hash_mux; 33struct mutex ecryptfs_daemon_hash_mux;
34static int ecryptfs_hash_buckets; 34static int ecryptfs_hash_bits;
35#define ecryptfs_uid_hash(uid) \ 35#define ecryptfs_uid_hash(uid) \
36 hash_long((unsigned long)uid, ecryptfs_hash_buckets) 36 hash_long((unsigned long)uid, ecryptfs_hash_bits)
37 37
38static u32 ecryptfs_msg_counter; 38static u32 ecryptfs_msg_counter;
39static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; 39static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -486,18 +486,19 @@ int ecryptfs_init_messaging(void)
486 } 486 }
487 mutex_init(&ecryptfs_daemon_hash_mux); 487 mutex_init(&ecryptfs_daemon_hash_mux);
488 mutex_lock(&ecryptfs_daemon_hash_mux); 488 mutex_lock(&ecryptfs_daemon_hash_mux);
489 ecryptfs_hash_buckets = 1; 489 ecryptfs_hash_bits = 1;
490 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) 490 while (ecryptfs_number_of_users >> ecryptfs_hash_bits)
491 ecryptfs_hash_buckets++; 491 ecryptfs_hash_bits++;
492 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head) 492 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
493 * ecryptfs_hash_buckets), GFP_KERNEL); 493 * (1 << ecryptfs_hash_bits)),
494 GFP_KERNEL);
494 if (!ecryptfs_daemon_hash) { 495 if (!ecryptfs_daemon_hash) {
495 rc = -ENOMEM; 496 rc = -ENOMEM;
496 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); 497 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
497 mutex_unlock(&ecryptfs_daemon_hash_mux); 498 mutex_unlock(&ecryptfs_daemon_hash_mux);
498 goto out; 499 goto out;
499 } 500 }
500 for (i = 0; i < ecryptfs_hash_buckets; i++) 501 for (i = 0; i < (1 << ecryptfs_hash_bits); i++)
501 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]); 502 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
502 mutex_unlock(&ecryptfs_daemon_hash_mux); 503 mutex_unlock(&ecryptfs_daemon_hash_mux);
503 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) 504 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
@@ -554,7 +555,7 @@ void ecryptfs_release_messaging(void)
554 int i; 555 int i;
555 556
556 mutex_lock(&ecryptfs_daemon_hash_mux); 557 mutex_lock(&ecryptfs_daemon_hash_mux);
557 for (i = 0; i < ecryptfs_hash_buckets; i++) { 558 for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
558 int rc; 559 int rc;
559 560
560 hlist_for_each_entry(daemon, elem, 561 hlist_for_each_entry(daemon, elem,
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 0435886e4a9f..f7fc286a3aa9 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -118,11 +118,15 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
118 */ 118 */
119static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) 119static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
120{ 120{
121 return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf); 121 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
122
123 if (!lower_dentry->d_sb->s_op->statfs)
124 return -ENOSYS;
125 return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
122} 126}
123 127
124/** 128/**
125 * ecryptfs_clear_inode 129 * ecryptfs_evict_inode
126 * @inode - The ecryptfs inode 130 * @inode - The ecryptfs inode
127 * 131 *
128 * Called by iput() when the inode reference count reached zero 132 * Called by iput() when the inode reference count reached zero
@@ -131,8 +135,10 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
131 * on the inode free list. We use this to drop out reference to the 135 * on the inode free list. We use this to drop out reference to the
132 * lower inode. 136 * lower inode.
133 */ 137 */
134static void ecryptfs_clear_inode(struct inode *inode) 138static void ecryptfs_evict_inode(struct inode *inode)
135{ 139{
140 truncate_inode_pages(&inode->i_data, 0);
141 end_writeback(inode);
136 iput(ecryptfs_inode_to_lower(inode)); 142 iput(ecryptfs_inode_to_lower(inode));
137} 143}
138 144
@@ -184,6 +190,6 @@ const struct super_operations ecryptfs_sops = {
184 .drop_inode = generic_delete_inode, 190 .drop_inode = generic_delete_inode,
185 .statfs = ecryptfs_statfs, 191 .statfs = ecryptfs_statfs,
186 .remount_fs = NULL, 192 .remount_fs = NULL,
187 .clear_inode = ecryptfs_clear_inode, 193 .evict_inode = ecryptfs_evict_inode,
188 .show_options = ecryptfs_show_options 194 .show_options = ecryptfs_show_options
189}; 195};
diff --git a/fs/exec.c b/fs/exec.c
index f2de04a01a2a..7761837e4500 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -28,7 +28,6 @@
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/stat.h> 29#include <linux/stat.h>
30#include <linux/fcntl.h> 30#include <linux/fcntl.h>
31#include <linux/smp_lock.h>
32#include <linux/swap.h> 31#include <linux/swap.h>
33#include <linux/string.h> 32#include <linux/string.h>
34#include <linux/init.h> 33#include <linux/init.h>
@@ -653,6 +652,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
653 else 652 else
654 stack_base = vma->vm_start - stack_expand; 653 stack_base = vma->vm_start - stack_expand;
655#endif 654#endif
655 current->mm->start_stack = bprm->p;
656 ret = expand_stack(vma, stack_base); 656 ret = expand_stack(vma, stack_base);
657 if (ret) 657 if (ret)
658 ret = -EFAULT; 658 ret = -EFAULT;
@@ -1891,13 +1891,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1891 */ 1891 */
1892 clear_thread_flag(TIF_SIGPENDING); 1892 clear_thread_flag(TIF_SIGPENDING);
1893 1893
1894 /*
1895 * lock_kernel() because format_corename() is controlled by sysctl, which
1896 * uses lock_kernel()
1897 */
1898 lock_kernel();
1899 ispipe = format_corename(corename, signr); 1894 ispipe = format_corename(corename, signr);
1900 unlock_kernel();
1901 1895
1902 if (ispipe) { 1896 if (ispipe) {
1903 int dump_count; 1897 int dump_count;
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 22721b2fd890..2dc925fa1010 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -256,7 +256,6 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
256} 256}
257 257
258/* inode.c */ 258/* inode.c */
259void exofs_truncate(struct inode *inode);
260int exofs_setattr(struct dentry *, struct iattr *); 259int exofs_setattr(struct dentry *, struct iattr *);
261int exofs_write_begin(struct file *file, struct address_space *mapping, 260int exofs_write_begin(struct file *file, struct address_space *mapping,
262 loff_t pos, unsigned len, unsigned flags, 261 loff_t pos, unsigned len, unsigned flags,
@@ -264,7 +263,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
264extern struct inode *exofs_iget(struct super_block *, unsigned long); 263extern struct inode *exofs_iget(struct super_block *, unsigned long);
265struct inode *exofs_new_inode(struct inode *, int); 264struct inode *exofs_new_inode(struct inode *, int);
266extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); 265extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
267extern void exofs_delete_inode(struct inode *); 266extern void exofs_evict_inode(struct inode *);
268 267
269/* dir.c: */ 268/* dir.c: */
270int exofs_add_link(struct dentry *, struct inode *); 269int exofs_add_link(struct dentry *, struct inode *);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index fef6899be397..f9bfe2b501d5 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -86,6 +86,5 @@ const struct file_operations exofs_file_operations = {
86}; 86};
87 87
88const struct inode_operations exofs_file_inode_operations = { 88const struct inode_operations exofs_file_inode_operations = {
89 .truncate = exofs_truncate,
90 .setattr = exofs_setattr, 89 .setattr = exofs_setattr,
91}; 90};
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 4bb6ef822e46..088cb476b68a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -697,6 +697,13 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
697 return write_exec(&pcol); 697 return write_exec(&pcol);
698} 698}
699 699
700/* i_mutex held using inode->i_size directly */
701static void _write_failed(struct inode *inode, loff_t to)
702{
703 if (to > inode->i_size)
704 truncate_pagecache(inode, to, inode->i_size);
705}
706
700int exofs_write_begin(struct file *file, struct address_space *mapping, 707int exofs_write_begin(struct file *file, struct address_space *mapping,
701 loff_t pos, unsigned len, unsigned flags, 708 loff_t pos, unsigned len, unsigned flags,
702 struct page **pagep, void **fsdata) 709 struct page **pagep, void **fsdata)
@@ -710,7 +717,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
710 fsdata); 717 fsdata);
711 if (ret) { 718 if (ret) {
712 EXOFS_DBGMSG("simple_write_begin faild\n"); 719 EXOFS_DBGMSG("simple_write_begin faild\n");
713 return ret; 720 goto out;
714 } 721 }
715 722
716 page = *pagep; 723 page = *pagep;
@@ -725,6 +732,9 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
725 EXOFS_DBGMSG("__readpage_filler faild\n"); 732 EXOFS_DBGMSG("__readpage_filler faild\n");
726 } 733 }
727 } 734 }
735out:
736 if (unlikely(ret))
737 _write_failed(mapping->host, pos + len);
728 738
729 return ret; 739 return ret;
730} 740}
@@ -750,6 +760,10 @@ static int exofs_write_end(struct file *file, struct address_space *mapping,
750 int ret; 760 int ret;
751 761
752 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); 762 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
763 if (unlikely(ret))
764 _write_failed(inode, pos + len);
765
766 /* TODO: once simple_write_end marks inode dirty remove */
753 if (i_size != inode->i_size) 767 if (i_size != inode->i_size)
754 mark_inode_dirty(inode); 768 mark_inode_dirty(inode);
755 return ret; 769 return ret;
@@ -808,87 +822,55 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
808 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 822 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
809} 823}
810 824
811/*
812 * get_block_t - Fill in a buffer_head
813 * An OSD takes care of block allocation so we just fake an allocation by
814 * putting in the inode's sector_t in the buffer_head.
815 * TODO: What about the case of create==0 and @iblock does not exist in the
816 * object?
817 */
818static int exofs_get_block(struct inode *inode, sector_t iblock,
819 struct buffer_head *bh_result, int create)
820{
821 map_bh(bh_result, inode->i_sb, iblock);
822 return 0;
823}
824
825const struct osd_attr g_attr_logical_length = ATTR_DEF( 825const struct osd_attr g_attr_logical_length = ATTR_DEF(
826 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 826 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
827 827
828static int _do_truncate(struct inode *inode) 828static int _do_truncate(struct inode *inode, loff_t newsize)
829{ 829{
830 struct exofs_i_info *oi = exofs_i(inode); 830 struct exofs_i_info *oi = exofs_i(inode);
831 loff_t isize = i_size_read(inode);
832 int ret; 831 int ret;
833 832
834 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 833 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
835 834
836 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); 835 ret = exofs_oi_truncate(oi, (u64)newsize);
836 if (likely(!ret))
837 truncate_setsize(inode, newsize);
837 838
838 ret = exofs_oi_truncate(oi, (u64)isize); 839 EXOFS_DBGMSG("(0x%lx) size=0x%llx ret=>%d\n",
839 EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); 840 inode->i_ino, newsize, ret);
840 return ret; 841 return ret;
841} 842}
842 843
843/* 844/*
844 * Truncate a file to the specified size - all we have to do is set the size 845 * Set inode attributes - update size attribute on OSD if needed,
845 * attribute. We make sure the object exists first. 846 * otherwise just call generic functions.
846 */
847void exofs_truncate(struct inode *inode)
848{
849 struct exofs_i_info *oi = exofs_i(inode);
850 int ret;
851
852 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
853 || S_ISLNK(inode->i_mode)))
854 return;
855 if (exofs_inode_is_fast_symlink(inode))
856 return;
857 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
858 return;
859
860 /* if we are about to truncate an object, and it hasn't been
861 * created yet, wait
862 */
863 if (unlikely(wait_obj_created(oi)))
864 goto fail;
865
866 ret = _do_truncate(inode);
867 if (ret)
868 goto fail;
869
870out:
871 mark_inode_dirty(inode);
872 return;
873fail:
874 make_bad_inode(inode);
875 goto out;
876}
877
878/*
879 * Set inode attributes - just call generic functions.
880 */ 847 */
881int exofs_setattr(struct dentry *dentry, struct iattr *iattr) 848int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
882{ 849{
883 struct inode *inode = dentry->d_inode; 850 struct inode *inode = dentry->d_inode;
884 int error; 851 int error;
885 852
853 /* if we are about to modify an object, and it hasn't been
854 * created yet, wait
855 */
856 error = wait_obj_created(exofs_i(inode));
857 if (unlikely(error))
858 return error;
859
886 error = inode_change_ok(inode, iattr); 860 error = inode_change_ok(inode, iattr);
887 if (error) 861 if (unlikely(error))
888 return error; 862 return error;
889 863
890 error = inode_setattr(inode, iattr); 864 if ((iattr->ia_valid & ATTR_SIZE) &&
891 return error; 865 iattr->ia_size != i_size_read(inode)) {
866 error = _do_truncate(inode, iattr->ia_size);
867 if (unlikely(error))
868 return error;
869 }
870
871 setattr_copy(inode, iattr);
872 mark_inode_dirty(inode);
873 return 0;
892} 874}
893 875
894static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( 876static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
@@ -1325,7 +1307,7 @@ static void delete_done(struct exofs_io_state *ios, void *p)
1325 * from the OSD here. We make sure the object was created before we try and 1307 * from the OSD here. We make sure the object was created before we try and
1326 * delete it. 1308 * delete it.
1327 */ 1309 */
1328void exofs_delete_inode(struct inode *inode) 1310void exofs_evict_inode(struct inode *inode)
1329{ 1311{
1330 struct exofs_i_info *oi = exofs_i(inode); 1312 struct exofs_i_info *oi = exofs_i(inode);
1331 struct super_block *sb = inode->i_sb; 1313 struct super_block *sb = inode->i_sb;
@@ -1335,30 +1317,27 @@ void exofs_delete_inode(struct inode *inode)
1335 1317
1336 truncate_inode_pages(&inode->i_data, 0); 1318 truncate_inode_pages(&inode->i_data, 0);
1337 1319
1338 if (is_bad_inode(inode)) 1320 /* TODO: should do better here */
1321 if (inode->i_nlink || is_bad_inode(inode))
1339 goto no_delete; 1322 goto no_delete;
1340 1323
1341 mark_inode_dirty(inode);
1342 exofs_update_inode(inode, inode_needs_sync(inode));
1343
1344 inode->i_size = 0; 1324 inode->i_size = 0;
1345 if (inode->i_blocks) 1325 end_writeback(inode);
1346 exofs_truncate(inode);
1347 1326
1348 clear_inode(inode); 1327 /* if we are deleting an obj that hasn't been created yet, wait */
1328 if (!obj_created(oi)) {
1329 BUG_ON(!obj_2bcreated(oi));
1330 wait_event(oi->i_wq, obj_created(oi));
1331 /* ignore the error attempt a remove anyway */
1332 }
1349 1333
1334 /* Now Remove the OSD objects */
1350 ret = exofs_get_io_state(&sbi->layout, &ios); 1335 ret = exofs_get_io_state(&sbi->layout, &ios);
1351 if (unlikely(ret)) { 1336 if (unlikely(ret)) {
1352 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1337 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1353 return; 1338 return;
1354 } 1339 }
1355 1340
1356 /* if we are deleting an obj that hasn't been created yet, wait */
1357 if (!obj_created(oi)) {
1358 BUG_ON(!obj_2bcreated(oi));
1359 wait_event(oi->i_wq, obj_created(oi));
1360 }
1361
1362 ios->obj.id = exofs_oi_objno(oi); 1341 ios->obj.id = exofs_oi_objno(oi);
1363 ios->done = delete_done; 1342 ios->done = delete_done;
1364 ios->private = sbi; 1343 ios->private = sbi;
@@ -1374,5 +1353,5 @@ void exofs_delete_inode(struct inode *inode)
1374 return; 1353 return;
1375 1354
1376no_delete: 1355no_delete:
1377 clear_inode(inode); 1356 end_writeback(inode);
1378} 1357}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 03149b9a5178..32cfd61def5f 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -743,7 +743,7 @@ static const struct super_operations exofs_sops = {
743 .alloc_inode = exofs_alloc_inode, 743 .alloc_inode = exofs_alloc_inode,
744 .destroy_inode = exofs_destroy_inode, 744 .destroy_inode = exofs_destroy_inode,
745 .write_inode = exofs_write_inode, 745 .write_inode = exofs_write_inode,
746 .delete_inode = exofs_delete_inode, 746 .evict_inode = exofs_evict_inode,
747 .put_super = exofs_put_super, 747 .put_super = exofs_put_super,
748 .write_super = exofs_write_super, 748 .write_super = exofs_write_super,
749 .sync_fs = exofs_sync_fs, 749 .sync_fs = exofs_sync_fs,
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e8766a396776..c6c684b44ea1 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -571,7 +571,7 @@ do_more:
571error_return: 571error_return:
572 brelse(bitmap_bh); 572 brelse(bitmap_bh);
573 release_blocks(sb, freed); 573 release_blocks(sb, freed);
574 dquot_free_block(inode, freed); 574 dquot_free_block_nodirty(inode, freed);
575} 575}
576 576
577/** 577/**
@@ -1418,7 +1418,8 @@ allocated:
1418 1418
1419 *errp = 0; 1419 *errp = 0;
1420 brelse(bitmap_bh); 1420 brelse(bitmap_bh);
1421 dquot_free_block(inode, *count-num); 1421 dquot_free_block_nodirty(inode, *count-num);
1422 mark_inode_dirty(inode);
1422 *count = num; 1423 *count = num;
1423 return ret_block; 1424 return ret_block;
1424 1425
@@ -1428,8 +1429,10 @@ out:
1428 /* 1429 /*
1429 * Undo the block allocation 1430 * Undo the block allocation
1430 */ 1431 */
1431 if (!performed_allocation) 1432 if (!performed_allocation) {
1432 dquot_free_block(inode, *count); 1433 dquot_free_block_nodirty(inode, *count);
1434 mark_inode_dirty(inode);
1435 }
1433 brelse(bitmap_bh); 1436 brelse(bitmap_bh);
1434 return 0; 1437 return 0;
1435} 1438}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 7516957273ed..764109886ec0 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -448,6 +448,11 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
448 return res; 448 return res;
449} 449}
450 450
451static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len)
452{
453 return __block_write_begin(page, pos, len, ext2_get_block);
454}
455
451/* Releases the page */ 456/* Releases the page */
452void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, 457void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
453 struct page *page, struct inode *inode, int update_times) 458 struct page *page, struct inode *inode, int update_times)
@@ -458,8 +463,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
458 int err; 463 int err;
459 464
460 lock_page(page); 465 lock_page(page);
461 err = __ext2_write_begin(NULL, page->mapping, pos, len, 466 err = ext2_prepare_chunk(page, pos, len);
462 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
463 BUG_ON(err); 467 BUG_ON(err);
464 de->inode = cpu_to_le32(inode->i_ino); 468 de->inode = cpu_to_le32(inode->i_ino);
465 ext2_set_de_type(de, inode); 469 ext2_set_de_type(de, inode);
@@ -542,8 +546,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
542got_it: 546got_it:
543 pos = page_offset(page) + 547 pos = page_offset(page) +
544 (char*)de - (char*)page_address(page); 548 (char*)de - (char*)page_address(page);
545 err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0, 549 err = ext2_prepare_chunk(page, pos, rec_len);
546 &page, NULL);
547 if (err) 550 if (err)
548 goto out_unlock; 551 goto out_unlock;
549 if (de->inode) { 552 if (de->inode) {
@@ -576,8 +579,7 @@ out_unlock:
576 */ 579 */
577int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) 580int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
578{ 581{
579 struct address_space *mapping = page->mapping; 582 struct inode *inode = page->mapping->host;
580 struct inode *inode = mapping->host;
581 char *kaddr = page_address(page); 583 char *kaddr = page_address(page);
582 unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); 584 unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
583 unsigned to = ((char *)dir - kaddr) + 585 unsigned to = ((char *)dir - kaddr) +
@@ -601,8 +603,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
601 from = (char*)pde - (char*)page_address(page); 603 from = (char*)pde - (char*)page_address(page);
602 pos = page_offset(page) + from; 604 pos = page_offset(page) + from;
603 lock_page(page); 605 lock_page(page);
604 err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0, 606 err = ext2_prepare_chunk(page, pos, to - from);
605 &page, NULL);
606 BUG_ON(err); 607 BUG_ON(err);
607 if (pde) 608 if (pde)
608 pde->rec_len = ext2_rec_len_to_disk(to - from); 609 pde->rec_len = ext2_rec_len_to_disk(to - from);
@@ -621,8 +622,7 @@ out:
621 */ 622 */
622int ext2_make_empty(struct inode *inode, struct inode *parent) 623int ext2_make_empty(struct inode *inode, struct inode *parent)
623{ 624{
624 struct address_space *mapping = inode->i_mapping; 625 struct page *page = grab_cache_page(inode->i_mapping, 0);
625 struct page *page = grab_cache_page(mapping, 0);
626 unsigned chunk_size = ext2_chunk_size(inode); 626 unsigned chunk_size = ext2_chunk_size(inode);
627 struct ext2_dir_entry_2 * de; 627 struct ext2_dir_entry_2 * de;
628 int err; 628 int err;
@@ -631,8 +631,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
631 if (!page) 631 if (!page)
632 return -ENOMEM; 632 return -ENOMEM;
633 633
634 err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0, 634 err = ext2_prepare_chunk(page, 0, chunk_size);
635 &page, NULL);
636 if (err) { 635 if (err) {
637 unlock_page(page); 636 unlock_page(page);
638 goto fail; 637 goto fail;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 52b34f1d2738..416daa62242c 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -119,7 +119,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
119/* inode.c */ 119/* inode.c */
120extern struct inode *ext2_iget (struct super_block *, unsigned long); 120extern struct inode *ext2_iget (struct super_block *, unsigned long);
121extern int ext2_write_inode (struct inode *, struct writeback_control *); 121extern int ext2_write_inode (struct inode *, struct writeback_control *);
122extern void ext2_delete_inode (struct inode *); 122extern void ext2_evict_inode(struct inode *);
123extern int ext2_sync_inode (struct inode *); 123extern int ext2_sync_inode (struct inode *);
124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); 124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
125extern int ext2_setattr (struct dentry *, struct iattr *); 125extern int ext2_setattr (struct dentry *, struct iattr *);
@@ -127,9 +127,6 @@ extern void ext2_set_inode_flags(struct inode *inode);
127extern void ext2_get_inode_flags(struct ext2_inode_info *); 127extern void ext2_get_inode_flags(struct ext2_inode_info *);
128extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 128extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
129 u64 start, u64 len); 129 u64 start, u64 len);
130int __ext2_write_begin(struct file *file, struct address_space *mapping,
131 loff_t pos, unsigned len, unsigned flags,
132 struct page **pagep, void **fsdata);
133 130
134/* ioctl.c */ 131/* ioctl.c */
135extern long ext2_ioctl(struct file *, unsigned int, unsigned long); 132extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 938dbc739d00..ad70479aabff 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -118,19 +118,14 @@ void ext2_free_inode (struct inode * inode)
118 * Note: we must free any quota before locking the superblock, 118 * Note: we must free any quota before locking the superblock,
119 * as writing the quota to disk may need the lock as well. 119 * as writing the quota to disk may need the lock as well.
120 */ 120 */
121 if (!is_bad_inode(inode)) { 121 /* Quota is already initialized in iput() */
122 /* Quota is already initialized in iput() */ 122 ext2_xattr_delete_inode(inode);
123 ext2_xattr_delete_inode(inode); 123 dquot_free_inode(inode);
124 dquot_free_inode(inode); 124 dquot_drop(inode);
125 dquot_drop(inode);
126 }
127 125
128 es = EXT2_SB(sb)->s_es; 126 es = EXT2_SB(sb)->s_es;
129 is_directory = S_ISDIR(inode->i_mode); 127 is_directory = S_ISDIR(inode->i_mode);
130 128
131 /* Do this BEFORE marking the inode not in use or returning an error */
132 clear_inode (inode);
133
134 if (ino < EXT2_FIRST_INO(sb) || 129 if (ino < EXT2_FIRST_INO(sb) ||
135 ino > le32_to_cpu(es->s_inodes_count)) { 130 ino > le32_to_cpu(es->s_inodes_count)) {
136 ext2_error (sb, "ext2_free_inode", 131 ext2_error (sb, "ext2_free_inode",
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 3675088cb88c..940c96168868 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -69,26 +69,42 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to)
69/* 69/*
70 * Called at the last iput() if i_nlink is zero. 70 * Called at the last iput() if i_nlink is zero.
71 */ 71 */
72void ext2_delete_inode (struct inode * inode) 72void ext2_evict_inode(struct inode * inode)
73{ 73{
74 if (!is_bad_inode(inode)) 74 struct ext2_block_alloc_info *rsv;
75 int want_delete = 0;
76
77 if (!inode->i_nlink && !is_bad_inode(inode)) {
78 want_delete = 1;
75 dquot_initialize(inode); 79 dquot_initialize(inode);
80 } else {
81 dquot_drop(inode);
82 }
83
76 truncate_inode_pages(&inode->i_data, 0); 84 truncate_inode_pages(&inode->i_data, 0);
77 85
78 if (is_bad_inode(inode)) 86 if (want_delete) {
79 goto no_delete; 87 /* set dtime */
80 EXT2_I(inode)->i_dtime = get_seconds(); 88 EXT2_I(inode)->i_dtime = get_seconds();
81 mark_inode_dirty(inode); 89 mark_inode_dirty(inode);
82 __ext2_write_inode(inode, inode_needs_sync(inode)); 90 __ext2_write_inode(inode, inode_needs_sync(inode));
91 /* truncate to 0 */
92 inode->i_size = 0;
93 if (inode->i_blocks)
94 ext2_truncate_blocks(inode, 0);
95 }
83 96
84 inode->i_size = 0; 97 invalidate_inode_buffers(inode);
85 if (inode->i_blocks) 98 end_writeback(inode);
86 ext2_truncate_blocks(inode, 0);
87 ext2_free_inode (inode);
88 99
89 return; 100 ext2_discard_reservation(inode);
90no_delete: 101 rsv = EXT2_I(inode)->i_block_alloc_info;
91 clear_inode(inode); /* We must guarantee clearing of inode... */ 102 EXT2_I(inode)->i_block_alloc_info = NULL;
103 if (unlikely(rsv))
104 kfree(rsv);
105
106 if (want_delete)
107 ext2_free_inode(inode);
92} 108}
93 109
94typedef struct { 110typedef struct {
@@ -423,6 +439,8 @@ static int ext2_alloc_blocks(struct inode *inode,
423failed_out: 439failed_out:
424 for (i = 0; i <index; i++) 440 for (i = 0; i <index; i++)
425 ext2_free_blocks(inode, new_blocks[i], 1); 441 ext2_free_blocks(inode, new_blocks[i], 1);
442 if (index)
443 mark_inode_dirty(inode);
426 return ret; 444 return ret;
427} 445}
428 446
@@ -765,14 +783,6 @@ ext2_readpages(struct file *file, struct address_space *mapping,
765 return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); 783 return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
766} 784}
767 785
768int __ext2_write_begin(struct file *file, struct address_space *mapping,
769 loff_t pos, unsigned len, unsigned flags,
770 struct page **pagep, void **fsdata)
771{
772 return block_write_begin_newtrunc(file, mapping, pos, len, flags,
773 pagep, fsdata, ext2_get_block);
774}
775
776static int 786static int
777ext2_write_begin(struct file *file, struct address_space *mapping, 787ext2_write_begin(struct file *file, struct address_space *mapping,
778 loff_t pos, unsigned len, unsigned flags, 788 loff_t pos, unsigned len, unsigned flags,
@@ -780,8 +790,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
780{ 790{
781 int ret; 791 int ret;
782 792
783 *pagep = NULL; 793 ret = block_write_begin(mapping, pos, len, flags, pagep,
784 ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 794 ext2_get_block);
785 if (ret < 0) 795 if (ret < 0)
786 ext2_write_failed(mapping, pos + len); 796 ext2_write_failed(mapping, pos + len);
787 return ret; 797 return ret;
@@ -806,13 +816,8 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
806{ 816{
807 int ret; 817 int ret;
808 818
809 /* 819 ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
810 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework 820 ext2_get_block);
811 * directory handling code to pass around offsets rather than struct
812 * pages in order to make this work easily.
813 */
814 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep,
815 fsdata, ext2_get_block);
816 if (ret < 0) 821 if (ret < 0)
817 ext2_write_failed(mapping, pos + len); 822 ext2_write_failed(mapping, pos + len);
818 return ret; 823 return ret;
@@ -838,7 +843,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
838 struct inode *inode = mapping->host; 843 struct inode *inode = mapping->host;
839 ssize_t ret; 844 ssize_t ret;
840 845
841 ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, 846 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
842 iov, offset, nr_segs, ext2_get_block, NULL); 847 iov, offset, nr_segs, ext2_get_block, NULL);
843 if (ret < 0 && (rw & WRITE)) 848 if (ret < 0 && (rw & WRITE))
844 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); 849 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
@@ -1006,8 +1011,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1006 else if (block_to_free == nr - count) 1011 else if (block_to_free == nr - count)
1007 count++; 1012 count++;
1008 else { 1013 else {
1009 mark_inode_dirty(inode);
1010 ext2_free_blocks (inode, block_to_free, count); 1014 ext2_free_blocks (inode, block_to_free, count);
1015 mark_inode_dirty(inode);
1011 free_this: 1016 free_this:
1012 block_to_free = nr; 1017 block_to_free = nr;
1013 count = 1; 1018 count = 1;
@@ -1015,8 +1020,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1015 } 1020 }
1016 } 1021 }
1017 if (count > 0) { 1022 if (count > 0) {
1018 mark_inode_dirty(inode);
1019 ext2_free_blocks (inode, block_to_free, count); 1023 ext2_free_blocks (inode, block_to_free, count);
1024 mark_inode_dirty(inode);
1020 } 1025 }
1021} 1026}
1022 1027
@@ -1169,15 +1174,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
1169 __ext2_truncate_blocks(inode, offset); 1174 __ext2_truncate_blocks(inode, offset);
1170} 1175}
1171 1176
1172int ext2_setsize(struct inode *inode, loff_t newsize) 1177static int ext2_setsize(struct inode *inode, loff_t newsize)
1173{ 1178{
1174 loff_t oldsize;
1175 int error; 1179 int error;
1176 1180
1177 error = inode_newsize_ok(inode, newsize);
1178 if (error)
1179 return error;
1180
1181 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1181 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1182 S_ISLNK(inode->i_mode))) 1182 S_ISLNK(inode->i_mode)))
1183 return -EINVAL; 1183 return -EINVAL;
@@ -1197,10 +1197,7 @@ int ext2_setsize(struct inode *inode, loff_t newsize)
1197 if (error) 1197 if (error)
1198 return error; 1198 return error;
1199 1199
1200 oldsize = inode->i_size; 1200 truncate_setsize(inode, newsize);
1201 i_size_write(inode, newsize);
1202 truncate_pagecache(inode, oldsize, newsize);
1203
1204 __ext2_truncate_blocks(inode, newsize); 1201 __ext2_truncate_blocks(inode, newsize);
1205 1202
1206 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 1203 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
@@ -1557,7 +1554,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1557 if (error) 1554 if (error)
1558 return error; 1555 return error;
1559 } 1556 }
1560 generic_setattr(inode, iattr); 1557 setattr_copy(inode, iattr);
1561 if (iattr->ia_valid & ATTR_MODE) 1558 if (iattr->ia_valid & ATTR_MODE)
1562 error = ext2_acl_chmod(inode); 1559 error = ext2_acl_chmod(inode);
1563 mark_inode_dirty(inode); 1560 mark_inode_dirty(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7ff43f4a59cd..1ec602673ea8 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -195,17 +195,6 @@ static void destroy_inodecache(void)
195 kmem_cache_destroy(ext2_inode_cachep); 195 kmem_cache_destroy(ext2_inode_cachep);
196} 196}
197 197
198static void ext2_clear_inode(struct inode *inode)
199{
200 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
201
202 dquot_drop(inode);
203 ext2_discard_reservation(inode);
204 EXT2_I(inode)->i_block_alloc_info = NULL;
205 if (unlikely(rsv))
206 kfree(rsv);
207}
208
209static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) 198static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
210{ 199{
211 struct super_block *sb = vfs->mnt_sb; 200 struct super_block *sb = vfs->mnt_sb;
@@ -299,13 +288,12 @@ static const struct super_operations ext2_sops = {
299 .alloc_inode = ext2_alloc_inode, 288 .alloc_inode = ext2_alloc_inode,
300 .destroy_inode = ext2_destroy_inode, 289 .destroy_inode = ext2_destroy_inode,
301 .write_inode = ext2_write_inode, 290 .write_inode = ext2_write_inode,
302 .delete_inode = ext2_delete_inode, 291 .evict_inode = ext2_evict_inode,
303 .put_super = ext2_put_super, 292 .put_super = ext2_put_super,
304 .write_super = ext2_write_super, 293 .write_super = ext2_write_super,
305 .sync_fs = ext2_sync_fs, 294 .sync_fs = ext2_sync_fs,
306 .statfs = ext2_statfs, 295 .statfs = ext2_statfs,
307 .remount_fs = ext2_remount, 296 .remount_fs = ext2_remount,
308 .clear_inode = ext2_clear_inode,
309 .show_options = ext2_show_options, 297 .show_options = ext2_show_options,
310#ifdef CONFIG_QUOTA 298#ifdef CONFIG_QUOTA
311 .quota_read = ext2_quota_read, 299 .quota_read = ext2_quota_read,
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7c3915780b19..8c29ae15129e 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -674,6 +674,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
674 new_bh = sb_getblk(sb, block); 674 new_bh = sb_getblk(sb, block);
675 if (!new_bh) { 675 if (!new_bh) {
676 ext2_free_blocks(inode, block, 1); 676 ext2_free_blocks(inode, block, 1);
677 mark_inode_dirty(inode);
677 error = -EIO; 678 error = -EIO;
678 goto cleanup; 679 goto cleanup;
679 } 680 }
@@ -703,8 +704,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
703 * written (only some dirty data were not) so we just proceed 704 * written (only some dirty data were not) so we just proceed
704 * as if nothing happened and cleanup the unused block */ 705 * as if nothing happened and cleanup the unused block */
705 if (error && error != -ENOSPC) { 706 if (error && error != -ENOSPC) {
706 if (new_bh && new_bh != old_bh) 707 if (new_bh && new_bh != old_bh) {
707 dquot_free_block(inode, 1); 708 dquot_free_block_nodirty(inode, 1);
709 mark_inode_dirty(inode);
710 }
708 goto cleanup; 711 goto cleanup;
709 } 712 }
710 } else 713 } else
@@ -727,6 +730,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
727 mb_cache_entry_free(ce); 730 mb_cache_entry_free(ce);
728 ea_bdebug(old_bh, "freeing"); 731 ea_bdebug(old_bh, "freeing");
729 ext2_free_blocks(inode, old_bh->b_blocknr, 1); 732 ext2_free_blocks(inode, old_bh->b_blocknr, 1);
733 mark_inode_dirty(inode);
730 /* We let our caller release old_bh, so we 734 /* We let our caller release old_bh, so we
731 * need to duplicate the buffer before. */ 735 * need to duplicate the buffer before. */
732 get_bh(old_bh); 736 get_bh(old_bh);
@@ -736,7 +740,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
736 le32_add_cpu(&HDR(old_bh)->h_refcount, -1); 740 le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
737 if (ce) 741 if (ce)
738 mb_cache_entry_release(ce); 742 mb_cache_entry_release(ce);
739 dquot_free_block(inode, 1); 743 dquot_free_block_nodirty(inode, 1);
744 mark_inode_dirty(inode);
740 mark_buffer_dirty(old_bh); 745 mark_buffer_dirty(old_bh);
741 ea_bdebug(old_bh, "refcount now=%d", 746 ea_bdebug(old_bh, "refcount now=%d",
742 le32_to_cpu(HDR(old_bh)->h_refcount)); 747 le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -799,7 +804,7 @@ ext2_xattr_delete_inode(struct inode *inode)
799 mark_buffer_dirty(bh); 804 mark_buffer_dirty(bh);
800 if (IS_SYNC(inode)) 805 if (IS_SYNC(inode))
801 sync_dirty_buffer(bh); 806 sync_dirty_buffer(bh);
802 dquot_free_block(inode, 1); 807 dquot_free_block_nodirty(inode, 1);
803 } 808 }
804 EXT2_I(inode)->i_file_acl = 0; 809 EXT2_I(inode)->i_file_acl = 0;
805 810
@@ -838,7 +843,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh)
838 ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); 843 ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
839 if (!ce) 844 if (!ce)
840 return -ENOMEM; 845 return -ENOMEM;
841 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); 846 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
842 if (error) { 847 if (error) {
843 mb_cache_entry_free(ce); 848 mb_cache_entry_free(ce);
844 if (error == -EBUSY) { 849 if (error == -EBUSY) {
@@ -912,8 +917,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
912 return NULL; /* never share */ 917 return NULL; /* never share */
913 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 918 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
914again: 919again:
915 ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, 920 ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
916 inode->i_sb->s_bdev, hash); 921 hash);
917 while (ce) { 922 while (ce) {
918 struct buffer_head *bh; 923 struct buffer_head *bh;
919 924
@@ -945,7 +950,7 @@ again:
945 unlock_buffer(bh); 950 unlock_buffer(bh);
946 brelse(bh); 951 brelse(bh);
947 } 952 }
948 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); 953 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
949 } 954 }
950 return NULL; 955 return NULL;
951} 956}
@@ -1021,9 +1026,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
1021int __init 1026int __init
1022init_ext2_xattr(void) 1027init_ext2_xattr(void)
1023{ 1028{
1024 ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, 1029 ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
1025 sizeof(struct mb_cache_entry) +
1026 sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1027 if (!ext2_xattr_cache) 1030 if (!ext2_xattr_cache)
1028 return -ENOMEM; 1031 return -ENOMEM;
1029 return 0; 1032 return 0;
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
index 522b15498f45..e8c6ba0e4a3e 100644
--- a/fs/ext3/Kconfig
+++ b/fs/ext3/Kconfig
@@ -31,6 +31,7 @@ config EXT3_FS
31config EXT3_DEFAULTS_TO_ORDERED 31config EXT3_DEFAULTS_TO_ORDERED
32 bool "Default to 'data=ordered' in ext3" 32 bool "Default to 'data=ordered' in ext3"
33 depends on EXT3_FS 33 depends on EXT3_FS
34 default y
34 help 35 help
35 The journal mode options for ext3 have different tradeoffs 36 The journal mode options for ext3 have different tradeoffs
36 between when data is guaranteed to be on disk and 37 between when data is guaranteed to be on disk and
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 498021eb88fb..4ab72db3559e 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -119,20 +119,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
119 ino = inode->i_ino; 119 ino = inode->i_ino;
120 ext3_debug ("freeing inode %lu\n", ino); 120 ext3_debug ("freeing inode %lu\n", ino);
121 121
122 /*
123 * Note: we must free any quota before locking the superblock,
124 * as writing the quota to disk may need the lock as well.
125 */
126 dquot_initialize(inode);
127 ext3_xattr_delete_inode(handle, inode);
128 dquot_free_inode(inode);
129 dquot_drop(inode);
130
131 is_directory = S_ISDIR(inode->i_mode); 122 is_directory = S_ISDIR(inode->i_mode);
132 123
133 /* Do this BEFORE marking the inode not in use or returning an error */
134 clear_inode (inode);
135
136 es = EXT3_SB(sb)->s_es; 124 es = EXT3_SB(sb)->s_es;
137 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 125 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
138 ext3_error (sb, "ext3_free_inode", 126 ext3_error (sb, "ext3_free_inode",
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 735f0190ec2a..5e0faf4cda79 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -190,18 +190,28 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
190} 190}
191 191
192/* 192/*
193 * Called at the last iput() if i_nlink is zero. 193 * Called at inode eviction from icache
194 */ 194 */
195void ext3_delete_inode (struct inode * inode) 195void ext3_evict_inode (struct inode *inode)
196{ 196{
197 struct ext3_block_alloc_info *rsv;
197 handle_t *handle; 198 handle_t *handle;
199 int want_delete = 0;
198 200
199 if (!is_bad_inode(inode)) 201 if (!inode->i_nlink && !is_bad_inode(inode)) {
200 dquot_initialize(inode); 202 dquot_initialize(inode);
203 want_delete = 1;
204 }
201 205
202 truncate_inode_pages(&inode->i_data, 0); 206 truncate_inode_pages(&inode->i_data, 0);
203 207
204 if (is_bad_inode(inode)) 208 ext3_discard_reservation(inode);
209 rsv = EXT3_I(inode)->i_block_alloc_info;
210 EXT3_I(inode)->i_block_alloc_info = NULL;
211 if (unlikely(rsv))
212 kfree(rsv);
213
214 if (!want_delete)
205 goto no_delete; 215 goto no_delete;
206 216
207 handle = start_transaction(inode); 217 handle = start_transaction(inode);
@@ -238,15 +248,22 @@ void ext3_delete_inode (struct inode * inode)
238 * having errors), but we can't free the inode if the mark_dirty 248 * having errors), but we can't free the inode if the mark_dirty
239 * fails. 249 * fails.
240 */ 250 */
241 if (ext3_mark_inode_dirty(handle, inode)) 251 if (ext3_mark_inode_dirty(handle, inode)) {
242 /* If that failed, just do the required in-core inode clear. */ 252 /* If that failed, just dquot_drop() and be done with that */
243 clear_inode(inode); 253 dquot_drop(inode);
244 else 254 end_writeback(inode);
255 } else {
256 ext3_xattr_delete_inode(handle, inode);
257 dquot_free_inode(inode);
258 dquot_drop(inode);
259 end_writeback(inode);
245 ext3_free_inode(handle, inode); 260 ext3_free_inode(handle, inode);
261 }
246 ext3_journal_stop(handle); 262 ext3_journal_stop(handle);
247 return; 263 return;
248no_delete: 264no_delete:
249 clear_inode(inode); /* We must guarantee clearing of inode... */ 265 end_writeback(inode);
266 dquot_drop(inode);
250} 267}
251 268
252typedef struct { 269typedef struct {
@@ -1149,9 +1166,25 @@ static int walk_page_buffers( handle_t *handle,
1149static int do_journal_get_write_access(handle_t *handle, 1166static int do_journal_get_write_access(handle_t *handle,
1150 struct buffer_head *bh) 1167 struct buffer_head *bh)
1151{ 1168{
1169 int dirty = buffer_dirty(bh);
1170 int ret;
1171
1152 if (!buffer_mapped(bh) || buffer_freed(bh)) 1172 if (!buffer_mapped(bh) || buffer_freed(bh))
1153 return 0; 1173 return 0;
1154 return ext3_journal_get_write_access(handle, bh); 1174 /*
1175 * __block_prepare_write() could have dirtied some buffers. Clean
1176 * the dirty bit as jbd2_journal_get_write_access() could complain
1177 * otherwise about fs integrity issues. Setting of the dirty bit
1178 * by __block_prepare_write() isn't a real problem here as we clear
1179 * the bit before releasing a page lock and thus writeback cannot
1180 * ever write the buffer.
1181 */
1182 if (dirty)
1183 clear_buffer_dirty(bh);
1184 ret = ext3_journal_get_write_access(handle, bh);
1185 if (!ret && dirty)
1186 ret = ext3_journal_dirty_metadata(handle, bh);
1187 return ret;
1155} 1188}
1156 1189
1157/* 1190/*
@@ -1196,8 +1229,7 @@ retry:
1196 ret = PTR_ERR(handle); 1229 ret = PTR_ERR(handle);
1197 goto out; 1230 goto out;
1198 } 1231 }
1199 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1232 ret = __block_write_begin(page, pos, len, ext3_get_block);
1200 ext3_get_block);
1201 if (ret) 1233 if (ret)
1202 goto write_begin_failed; 1234 goto write_begin_failed;
1203 1235
@@ -1625,10 +1657,7 @@ static int ext3_writeback_writepage(struct page *page,
1625 goto out_fail; 1657 goto out_fail;
1626 } 1658 }
1627 1659
1628 if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) 1660 ret = block_write_full_page(page, ext3_get_block, wbc);
1629 ret = nobh_writepage(page, ext3_get_block, wbc);
1630 else
1631 ret = block_write_full_page(page, ext3_get_block, wbc);
1632 1661
1633 err = ext3_journal_stop(handle); 1662 err = ext3_journal_stop(handle);
1634 if (!ret) 1663 if (!ret)
@@ -1785,6 +1814,17 @@ retry:
1785 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1814 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1786 offset, nr_segs, 1815 offset, nr_segs,
1787 ext3_get_block, NULL); 1816 ext3_get_block, NULL);
1817 /*
1818 * In case of error extending write may have instantiated a few
1819 * blocks outside i_size. Trim these off again.
1820 */
1821 if (unlikely((rw & WRITE) && ret < 0)) {
1822 loff_t isize = i_size_read(inode);
1823 loff_t end = offset + iov_length(iov, nr_segs);
1824
1825 if (end > isize)
1826 vmtruncate(inode, isize);
1827 }
1788 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1828 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1789 goto retry; 1829 goto retry;
1790 1830
@@ -1922,17 +1962,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1922 length = blocksize - (offset & (blocksize - 1)); 1962 length = blocksize - (offset & (blocksize - 1));
1923 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 1963 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1924 1964
1925 /*
1926 * For "nobh" option, we can only work if we don't need to
1927 * read-in the page - otherwise we create buffers to do the IO.
1928 */
1929 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
1930 ext3_should_writeback_data(inode) && PageUptodate(page)) {
1931 zero_user(page, offset, length);
1932 set_page_dirty(page);
1933 goto unlock;
1934 }
1935
1936 if (!page_has_buffers(page)) 1965 if (!page_has_buffers(page))
1937 create_empty_buffers(page, blocksize, 0); 1966 create_empty_buffers(page, blocksize, 0);
1938 1967
@@ -2284,27 +2313,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2284 depth); 2313 depth);
2285 2314
2286 /* 2315 /*
2287 * We've probably journalled the indirect block several
2288 * times during the truncate. But it's no longer
2289 * needed and we now drop it from the transaction via
2290 * journal_revoke().
2291 *
2292 * That's easy if it's exclusively part of this
2293 * transaction. But if it's part of the committing
2294 * transaction then journal_forget() will simply
2295 * brelse() it. That means that if the underlying
2296 * block is reallocated in ext3_get_block(),
2297 * unmap_underlying_metadata() will find this block
2298 * and will try to get rid of it. damn, damn.
2299 *
2300 * If this block has already been committed to the
2301 * journal, a revoke record will be written. And
2302 * revoke records must be emitted *before* clearing
2303 * this block's bit in the bitmaps.
2304 */
2305 ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
2306
2307 /*
2308 * Everything below this this pointer has been 2316 * Everything below this this pointer has been
2309 * released. Now let this top-of-subtree go. 2317 * released. Now let this top-of-subtree go.
2310 * 2318 *
@@ -2327,6 +2335,31 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2327 truncate_restart_transaction(handle, inode); 2335 truncate_restart_transaction(handle, inode);
2328 } 2336 }
2329 2337
2338 /*
2339 * We've probably journalled the indirect block several
2340 * times during the truncate. But it's no longer
2341 * needed and we now drop it from the transaction via
2342 * journal_revoke().
2343 *
2344 * That's easy if it's exclusively part of this
2345 * transaction. But if it's part of the committing
2346 * transaction then journal_forget() will simply
2347 * brelse() it. That means that if the underlying
2348 * block is reallocated in ext3_get_block(),
2349 * unmap_underlying_metadata() will find this block
2350 * and will try to get rid of it. damn, damn. Thus
2351 * we don't allow a block to be reallocated until
2352 * a transaction freeing it has fully committed.
2353 *
2354 * We also have to make sure journal replay after a
2355 * crash does not overwrite non-journaled data blocks
2356 * with old metadata when the block got reallocated for
2357 * data. Thus we have to store a revoke record for a
2358 * block in the same transaction in which we free the
2359 * block.
2360 */
2361 ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
2362
2330 ext3_free_blocks(handle, inode, nr, 1); 2363 ext3_free_blocks(handle, inode, nr, 1);
2331 2364
2332 if (parent_bh) { 2365 if (parent_bh) {
@@ -2554,7 +2587,7 @@ out_stop:
2554 * If this was a simple ftruncate(), and the file will remain alive 2587 * If this was a simple ftruncate(), and the file will remain alive
2555 * then we need to clear up the orphan record which we created above. 2588 * then we need to clear up the orphan record which we created above.
2556 * However, if this was a real unlink then we were called by 2589 * However, if this was a real unlink then we were called by
2557 * ext3_delete_inode(), and we allow that function to clean up the 2590 * ext3_evict_inode(), and we allow that function to clean up the
2558 * orphan info for us. 2591 * orphan info for us.
2559 */ 2592 */
2560 if (inode->i_nlink) 2593 if (inode->i_nlink)
@@ -3198,9 +3231,17 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3198 ext3_journal_stop(handle); 3231 ext3_journal_stop(handle);
3199 } 3232 }
3200 3233
3201 rc = inode_setattr(inode, attr); 3234 if ((attr->ia_valid & ATTR_SIZE) &&
3235 attr->ia_size != i_size_read(inode)) {
3236 rc = vmtruncate(inode, attr->ia_size);
3237 if (rc)
3238 goto err_out;
3239 }
3240
3241 setattr_copy(inode, attr);
3242 mark_inode_dirty(inode);
3202 3243
3203 if (!rc && (ia_valid & ATTR_MODE)) 3244 if (ia_valid & ATTR_MODE)
3204 rc = ext3_acl_chmod(inode); 3245 rc = ext3_acl_chmod(inode);
3205 3246
3206err_out: 3247err_out:
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index ee184084ca42..2b35ddb70d65 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1447,7 +1447,6 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
1447 struct inode *inode) 1447 struct inode *inode)
1448{ 1448{
1449 struct inode *dir = dentry->d_parent->d_inode; 1449 struct inode *dir = dentry->d_parent->d_inode;
1450 unsigned long offset;
1451 struct buffer_head * bh; 1450 struct buffer_head * bh;
1452 struct ext3_dir_entry_2 *de; 1451 struct ext3_dir_entry_2 *de;
1453 struct super_block * sb; 1452 struct super_block * sb;
@@ -1469,7 +1468,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
1469 ext3_mark_inode_dirty(handle, dir); 1468 ext3_mark_inode_dirty(handle, dir);
1470 } 1469 }
1471 blocks = dir->i_size >> sb->s_blocksize_bits; 1470 blocks = dir->i_size >> sb->s_blocksize_bits;
1472 for (block = 0, offset = 0; block < blocks; block++) { 1471 for (block = 0; block < blocks; block++) {
1473 bh = ext3_bread(handle, dir, block, 0, &retval); 1472 bh = ext3_bread(handle, dir, block, 0, &retval);
1474 if(!bh) 1473 if(!bh)
1475 return retval; 1474 return retval;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 54351ac7cef9..0ccd7b12b73c 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -964,7 +964,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
964 ext3_fsblk_t n_blocks_count) 964 ext3_fsblk_t n_blocks_count)
965{ 965{
966 ext3_fsblk_t o_blocks_count; 966 ext3_fsblk_t o_blocks_count;
967 unsigned long o_groups_count;
968 ext3_grpblk_t last; 967 ext3_grpblk_t last;
969 ext3_grpblk_t add; 968 ext3_grpblk_t add;
970 struct buffer_head * bh; 969 struct buffer_head * bh;
@@ -976,7 +975,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
976 * yet: we're going to revalidate es->s_blocks_count after 975 * yet: we're going to revalidate es->s_blocks_count after
977 * taking the s_resize_lock below. */ 976 * taking the s_resize_lock below. */
978 o_blocks_count = le32_to_cpu(es->s_blocks_count); 977 o_blocks_count = le32_to_cpu(es->s_blocks_count);
979 o_groups_count = EXT3_SB(sb)->s_groups_count;
980 978
981 if (test_opt(sb, DEBUG)) 979 if (test_opt(sb, DEBUG))
982 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", 980 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6c953bb255e7..5dbf4dba03c4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -527,17 +527,6 @@ static void destroy_inodecache(void)
527 kmem_cache_destroy(ext3_inode_cachep); 527 kmem_cache_destroy(ext3_inode_cachep);
528} 528}
529 529
530static void ext3_clear_inode(struct inode *inode)
531{
532 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
533
534 dquot_drop(inode);
535 ext3_discard_reservation(inode);
536 EXT3_I(inode)->i_block_alloc_info = NULL;
537 if (unlikely(rsv))
538 kfree(rsv);
539}
540
541static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 530static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
542{ 531{
543#if defined(CONFIG_QUOTA) 532#if defined(CONFIG_QUOTA)
@@ -661,9 +650,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
661 */ 650 */
662 seq_puts(seq, ",barrier="); 651 seq_puts(seq, ",barrier=");
663 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 652 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
664 if (test_opt(sb, NOBH))
665 seq_puts(seq, ",nobh");
666
667 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); 653 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
668 if (test_opt(sb, DATA_ERR_ABORT)) 654 if (test_opt(sb, DATA_ERR_ABORT))
669 seq_puts(seq, ",data_err=abort"); 655 seq_puts(seq, ",data_err=abort");
@@ -783,14 +769,13 @@ static const struct super_operations ext3_sops = {
783 .destroy_inode = ext3_destroy_inode, 769 .destroy_inode = ext3_destroy_inode,
784 .write_inode = ext3_write_inode, 770 .write_inode = ext3_write_inode,
785 .dirty_inode = ext3_dirty_inode, 771 .dirty_inode = ext3_dirty_inode,
786 .delete_inode = ext3_delete_inode, 772 .evict_inode = ext3_evict_inode,
787 .put_super = ext3_put_super, 773 .put_super = ext3_put_super,
788 .sync_fs = ext3_sync_fs, 774 .sync_fs = ext3_sync_fs,
789 .freeze_fs = ext3_freeze, 775 .freeze_fs = ext3_freeze,
790 .unfreeze_fs = ext3_unfreeze, 776 .unfreeze_fs = ext3_unfreeze,
791 .statfs = ext3_statfs, 777 .statfs = ext3_statfs,
792 .remount_fs = ext3_remount, 778 .remount_fs = ext3_remount,
793 .clear_inode = ext3_clear_inode,
794 .show_options = ext3_show_options, 779 .show_options = ext3_show_options,
795#ifdef CONFIG_QUOTA 780#ifdef CONFIG_QUOTA
796 .quota_read = ext3_quota_read, 781 .quota_read = ext3_quota_read,
@@ -1255,10 +1240,12 @@ set_qf_format:
1255 *n_blocks_count = option; 1240 *n_blocks_count = option;
1256 break; 1241 break;
1257 case Opt_nobh: 1242 case Opt_nobh:
1258 set_opt(sbi->s_mount_opt, NOBH); 1243 ext3_msg(sb, KERN_WARNING,
1244 "warning: ignoring deprecated nobh option");
1259 break; 1245 break;
1260 case Opt_bh: 1246 case Opt_bh:
1261 clear_opt(sbi->s_mount_opt, NOBH); 1247 ext3_msg(sb, KERN_WARNING,
1248 "warning: ignoring deprecated bh option");
1262 break; 1249 break;
1263 default: 1250 default:
1264 ext3_msg(sb, KERN_ERR, 1251 ext3_msg(sb, KERN_ERR,
@@ -2001,14 +1988,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2001 break; 1988 break;
2002 } 1989 }
2003 1990
2004 if (test_opt(sb, NOBH)) {
2005 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
2006 ext3_msg(sb, KERN_WARNING,
2007 "warning: ignoring nobh option - "
2008 "it is supported only with writeback mode");
2009 clear_opt(sbi->s_mount_opt, NOBH);
2010 }
2011 }
2012 /* 1991 /*
2013 * The journal_load will have done any necessary log recovery, 1992 * The journal_load will have done any necessary log recovery,
2014 * so we can safely mount the rest of the filesystem now. 1993 * so we can safely mount the rest of the filesystem now.
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 71fb8d65e54c..e69dc6dfaa89 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1139,7 +1139,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
1139 ea_bdebug(bh, "out of memory"); 1139 ea_bdebug(bh, "out of memory");
1140 return; 1140 return;
1141 } 1141 }
1142 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); 1142 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1143 if (error) { 1143 if (error) {
1144 mb_cache_entry_free(ce); 1144 mb_cache_entry_free(ce);
1145 if (error == -EBUSY) { 1145 if (error == -EBUSY) {
@@ -1211,8 +1211,8 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
1211 return NULL; /* never share */ 1211 return NULL; /* never share */
1212 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1212 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1213again: 1213again:
1214 ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, 1214 ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev,
1215 inode->i_sb->s_bdev, hash); 1215 hash);
1216 while (ce) { 1216 while (ce) {
1217 struct buffer_head *bh; 1217 struct buffer_head *bh;
1218 1218
@@ -1237,7 +1237,7 @@ again:
1237 return bh; 1237 return bh;
1238 } 1238 }
1239 brelse(bh); 1239 brelse(bh);
1240 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); 1240 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1241 } 1241 }
1242 return NULL; 1242 return NULL;
1243} 1243}
@@ -1313,9 +1313,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1313int __init 1313int __init
1314init_ext3_xattr(void) 1314init_ext3_xattr(void)
1315{ 1315{
1316 ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, 1316 ext3_xattr_cache = mb_cache_create("ext3_xattr", 6);
1317 sizeof(struct mb_cache_entry) +
1318 sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1319 if (!ext3_xattr_cache) 1317 if (!ext3_xattr_cache)
1320 return -ENOMEM; 1318 return -ENOMEM;
1321 return 0; 1319 return 0;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index feaf498feaa6..5e2ed4504ead 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -204,6 +204,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
204 return error; 204 return error;
205 else { 205 else {
206 inode->i_mode = mode; 206 inode->i_mode = mode;
207 inode->i_ctime = ext4_current_time(inode);
207 ext4_mark_inode_dirty(handle, inode); 208 ext4_mark_inode_dirty(handle, inode);
208 if (error == 0) 209 if (error == 0)
209 acl = NULL; 210 acl = NULL;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 95b7594c76f9..bd30799a43ed 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -377,14 +377,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
377 ext4_grpblk_t bit; 377 ext4_grpblk_t bit;
378 unsigned int i; 378 unsigned int i;
379 struct ext4_group_desc *desc; 379 struct ext4_group_desc *desc;
380 struct ext4_super_block *es; 380 struct ext4_sb_info *sbi = EXT4_SB(sb);
381 struct ext4_sb_info *sbi;
382 int err = 0, ret, blk_free_count; 381 int err = 0, ret, blk_free_count;
383 ext4_grpblk_t blocks_freed; 382 ext4_grpblk_t blocks_freed;
384 struct ext4_group_info *grp; 383 struct ext4_group_info *grp;
385 384
386 sbi = EXT4_SB(sb);
387 es = sbi->s_es;
388 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); 385 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
389 386
390 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 387 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -477,7 +474,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
477 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); 474 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
478 if (!err) 475 if (!err)
479 err = ret; 476 err = ret;
480 sb->s_dirt = 1;
481 477
482error_return: 478error_return:
483 brelse(bitmap_bh); 479 brelse(bitmap_bh);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 5b6973fbf1bd..3db5084db9bd 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -229,16 +229,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
229 229
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || 230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
231 (start_blk + count < start_blk) || 231 (start_blk + count < start_blk) ||
232 (start_blk + count > ext4_blocks_count(sbi->s_es))) 232 (start_blk + count > ext4_blocks_count(sbi->s_es))) {
233 sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
233 return 0; 234 return 0;
235 }
234 while (n) { 236 while (n) {
235 entry = rb_entry(n, struct ext4_system_zone, node); 237 entry = rb_entry(n, struct ext4_system_zone, node);
236 if (start_blk + count - 1 < entry->start_blk) 238 if (start_blk + count - 1 < entry->start_blk)
237 n = n->rb_left; 239 n = n->rb_left;
238 else if (start_blk >= (entry->start_blk + entry->count)) 240 else if (start_blk >= (entry->start_blk + entry->count))
239 n = n->rb_right; 241 n = n->rb_right;
240 else 242 else {
243 sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
241 return 0; 244 return 0;
245 }
242 } 246 }
243 return 1; 247 return 1;
244} 248}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ea5e6cb7e2a5..374510f72baa 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -61,10 +61,11 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
61} 61}
62 62
63 63
64int ext4_check_dir_entry(const char *function, struct inode *dir, 64int __ext4_check_dir_entry(const char *function, unsigned int line,
65 struct ext4_dir_entry_2 *de, 65 struct inode *dir,
66 struct buffer_head *bh, 66 struct ext4_dir_entry_2 *de,
67 unsigned int offset) 67 struct buffer_head *bh,
68 unsigned int offset)
68{ 69{
69 const char *error_msg = NULL; 70 const char *error_msg = NULL;
70 const int rlen = ext4_rec_len_from_disk(de->rec_len, 71 const int rlen = ext4_rec_len_from_disk(de->rec_len,
@@ -83,11 +84,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
83 error_msg = "inode out of bounds"; 84 error_msg = "inode out of bounds";
84 85
85 if (error_msg != NULL) 86 if (error_msg != NULL)
86 ext4_error_inode(function, dir, 87 ext4_error_inode(dir, function, line, bh->b_blocknr,
87 "bad entry in directory: %s - block=%llu" 88 "bad entry in directory: %s - "
88 "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", 89 "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
89 error_msg, (unsigned long long) bh->b_blocknr, 90 error_msg, (unsigned) (offset%bh->b_size), offset,
90 (unsigned) (offset%bh->b_size), offset,
91 le32_to_cpu(de->inode), 91 le32_to_cpu(de->inode),
92 rlen, de->name_len); 92 rlen, de->name_len);
93 return error_msg == NULL ? 1 : 0; 93 return error_msg == NULL ? 1 : 0;
@@ -121,7 +121,8 @@ static int ext4_readdir(struct file *filp,
121 * We don't set the inode dirty flag since it's not 121 * We don't set the inode dirty flag since it's not
122 * critical that it get flushed back to the disk. 122 * critical that it get flushed back to the disk.
123 */ 123 */
124 ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); 124 ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
125 EXT4_INODE_INDEX);
125 } 126 }
126 stored = 0; 127 stored = 0;
127 offset = filp->f_pos & (sb->s_blocksize - 1); 128 offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -193,7 +194,7 @@ revalidate:
193 while (!error && filp->f_pos < inode->i_size 194 while (!error && filp->f_pos < inode->i_size
194 && offset < sb->s_blocksize) { 195 && offset < sb->s_blocksize) {
195 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); 196 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
196 if (!ext4_check_dir_entry("ext4_readdir", inode, de, 197 if (!ext4_check_dir_entry(inode, de,
197 bh, offset)) { 198 bh, offset)) {
198 /* 199 /*
199 * On error, skip the f_pos to the next block 200 * On error, skip the f_pos to the next block
@@ -343,7 +344,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
343 struct dir_private_info *info; 344 struct dir_private_info *info;
344 int len; 345 int len;
345 346
346 info = (struct dir_private_info *) dir_file->private_data; 347 info = dir_file->private_data;
347 p = &info->root.rb_node; 348 p = &info->root.rb_node;
348 349
349 /* Create and allocate the fname structure */ 350 /* Create and allocate the fname structure */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19a4de57128a..889ec9d5e6ad 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -57,10 +57,13 @@
57#endif 57#endif
58 58
59#define EXT4_ERROR_INODE(inode, fmt, a...) \ 59#define EXT4_ERROR_INODE(inode, fmt, a...) \
60 ext4_error_inode(__func__, (inode), (fmt), ## a) 60 ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
61
62#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
63 ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
61 64
62#define EXT4_ERROR_FILE(file, fmt, a...) \ 65#define EXT4_ERROR_FILE(file, fmt, a...) \
63 ext4_error_file(__func__, (file), (fmt), ## a) 66 ext4_error_file(__func__, __LINE__, (file), (fmt), ## a)
64 67
65/* data type for block offset of block group */ 68/* data type for block offset of block group */
66typedef int ext4_grpblk_t; 69typedef int ext4_grpblk_t;
@@ -167,13 +170,15 @@ struct mpage_da_data {
167}; 170};
168#define EXT4_IO_UNWRITTEN 0x1 171#define EXT4_IO_UNWRITTEN 0x1
169typedef struct ext4_io_end { 172typedef struct ext4_io_end {
170 struct list_head list; /* per-file finished AIO list */ 173 struct list_head list; /* per-file finished IO list */
171 struct inode *inode; /* file being written to */ 174 struct inode *inode; /* file being written to */
172 unsigned int flag; /* unwritten or not */ 175 unsigned int flag; /* unwritten or not */
173 struct page *page; /* page struct for buffer write */ 176 struct page *page; /* page struct for buffer write */
174 loff_t offset; /* offset in the file */ 177 loff_t offset; /* offset in the file */
175 ssize_t size; /* size of the extent */ 178 ssize_t size; /* size of the extent */
176 struct work_struct work; /* data work queue */ 179 struct work_struct work; /* data work queue */
180 struct kiocb *iocb; /* iocb struct for AIO */
181 int result; /* error value for AIO */
177} ext4_io_end_t; 182} ext4_io_end_t;
178 183
179/* 184/*
@@ -460,7 +465,7 @@ struct ext4_new_group_data {
460}; 465};
461 466
462/* 467/*
463 * Flags used by ext4_get_blocks() 468 * Flags used by ext4_map_blocks()
464 */ 469 */
465 /* Allocate any needed blocks and/or convert an unitialized 470 /* Allocate any needed blocks and/or convert an unitialized
466 extent to be an initialized ext4 */ 471 extent to be an initialized ext4 */
@@ -873,7 +878,6 @@ struct ext4_inode_info {
873#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ 878#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
874#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ 879#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
875#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ 880#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
876#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
877#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ 881#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
878#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ 882#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
879#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ 883#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
@@ -982,7 +986,7 @@ struct ext4_super_block {
982 __le32 s_last_orphan; /* start of list of inodes to delete */ 986 __le32 s_last_orphan; /* start of list of inodes to delete */
983 __le32 s_hash_seed[4]; /* HTREE hash seed */ 987 __le32 s_hash_seed[4]; /* HTREE hash seed */
984 __u8 s_def_hash_version; /* Default hash version to use */ 988 __u8 s_def_hash_version; /* Default hash version to use */
985 __u8 s_reserved_char_pad; 989 __u8 s_jnl_backup_type;
986 __le16 s_desc_size; /* size of group descriptor */ 990 __le16 s_desc_size; /* size of group descriptor */
987/*100*/ __le32 s_default_mount_opts; 991/*100*/ __le32 s_default_mount_opts;
988 __le32 s_first_meta_bg; /* First metablock block group */ 992 __le32 s_first_meta_bg; /* First metablock block group */
@@ -1000,12 +1004,34 @@ struct ext4_super_block {
1000 __le64 s_mmp_block; /* Block for multi-mount protection */ 1004 __le64 s_mmp_block; /* Block for multi-mount protection */
1001 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 1005 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
1002 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 1006 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
1003 __u8 s_reserved_char_pad2; 1007 __u8 s_reserved_char_pad;
1004 __le16 s_reserved_pad; 1008 __le16 s_reserved_pad;
1005 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ 1009 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */
1006 __u32 s_reserved[160]; /* Padding to the end of the block */ 1010 __le32 s_snapshot_inum; /* Inode number of active snapshot */
1011 __le32 s_snapshot_id; /* sequential ID of active snapshot */
1012 __le64 s_snapshot_r_blocks_count; /* reserved blocks for active
1013 snapshot's future use */
1014 __le32 s_snapshot_list; /* inode number of the head of the
1015 on-disk snapshot list */
1016#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
1017 __le32 s_error_count; /* number of fs errors */
1018 __le32 s_first_error_time; /* first time an error happened */
1019 __le32 s_first_error_ino; /* inode involved in first error */
1020 __le64 s_first_error_block; /* block involved of first error */
1021 __u8 s_first_error_func[32]; /* function where the error happened */
1022 __le32 s_first_error_line; /* line number where error happened */
1023 __le32 s_last_error_time; /* most recent time of an error */
1024 __le32 s_last_error_ino; /* inode involved in last error */
1025 __le32 s_last_error_line; /* line number where error happened */
1026 __le64 s_last_error_block; /* block involved of last error */
1027 __u8 s_last_error_func[32]; /* function where the error happened */
1028#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
1029 __u8 s_mount_opts[64];
1030 __le32 s_reserved[112]; /* Padding to the end of the block */
1007}; 1031};
1008 1032
1033#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
1034
1009#ifdef __KERNEL__ 1035#ifdef __KERNEL__
1010 1036
1011/* 1037/*
@@ -1143,6 +1169,9 @@ struct ext4_sb_info {
1143 1169
1144 /* workqueue for dio unwritten */ 1170 /* workqueue for dio unwritten */
1145 struct workqueue_struct *dio_unwritten_wq; 1171 struct workqueue_struct *dio_unwritten_wq;
1172
1173 /* timer for periodic error stats printing */
1174 struct timer_list s_err_report;
1146}; 1175};
1147 1176
1148static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1177static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1313,6 +1342,10 @@ EXT4_INODE_BIT_FNS(state, state_flags)
1313#define EXT4_DEFM_JMODE_DATA 0x0020 1342#define EXT4_DEFM_JMODE_DATA 0x0020
1314#define EXT4_DEFM_JMODE_ORDERED 0x0040 1343#define EXT4_DEFM_JMODE_ORDERED 0x0040
1315#define EXT4_DEFM_JMODE_WBACK 0x0060 1344#define EXT4_DEFM_JMODE_WBACK 0x0060
1345#define EXT4_DEFM_NOBARRIER 0x0100
1346#define EXT4_DEFM_BLOCK_VALIDITY 0x0200
1347#define EXT4_DEFM_DISCARD 0x0400
1348#define EXT4_DEFM_NODELALLOC 0x0800
1316 1349
1317/* 1350/*
1318 * Default journal batch times 1351 * Default journal batch times
@@ -1379,6 +1412,43 @@ struct ext4_dir_entry_2 {
1379#define EXT4_MAX_REC_LEN ((1<<16)-1) 1412#define EXT4_MAX_REC_LEN ((1<<16)-1)
1380 1413
1381/* 1414/*
1415 * If we ever get support for fs block sizes > page_size, we'll need
1416 * to remove the #if statements in the next two functions...
1417 */
1418static inline unsigned int
1419ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
1420{
1421 unsigned len = le16_to_cpu(dlen);
1422
1423#if (PAGE_CACHE_SIZE >= 65536)
1424 if (len == EXT4_MAX_REC_LEN || len == 0)
1425 return blocksize;
1426 return (len & 65532) | ((len & 3) << 16);
1427#else
1428 return len;
1429#endif
1430}
1431
1432static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
1433{
1434 if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
1435 BUG();
1436#if (PAGE_CACHE_SIZE >= 65536)
1437 if (len < 65536)
1438 return cpu_to_le16(len);
1439 if (len == blocksize) {
1440 if (blocksize == 65536)
1441 return cpu_to_le16(EXT4_MAX_REC_LEN);
1442 else
1443 return cpu_to_le16(0);
1444 }
1445 return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
1446#else
1447 return cpu_to_le16(len);
1448#endif
1449}
1450
1451/*
1382 * Hash Tree Directory indexing 1452 * Hash Tree Directory indexing
1383 * (c) Daniel Phillips, 2001 1453 * (c) Daniel Phillips, 2001
1384 */ 1454 */
@@ -1510,9 +1580,11 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
1510 ext4_init_block_bitmap(sb, NULL, group, desc) 1580 ext4_init_block_bitmap(sb, NULL, group, desc)
1511 1581
1512/* dir.c */ 1582/* dir.c */
1513extern int ext4_check_dir_entry(const char *, struct inode *, 1583extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
1514 struct ext4_dir_entry_2 *, 1584 struct ext4_dir_entry_2 *,
1515 struct buffer_head *, unsigned int); 1585 struct buffer_head *, unsigned int);
1586#define ext4_check_dir_entry(dir, de, bh, offset) \
1587 __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset))
1516extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 1588extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1517 __u32 minor_hash, 1589 __u32 minor_hash,
1518 struct ext4_dir_entry_2 *dirent); 1590 struct ext4_dir_entry_2 *dirent);
@@ -1571,7 +1643,8 @@ extern int ext4_write_inode(struct inode *, struct writeback_control *);
1571extern int ext4_setattr(struct dentry *, struct iattr *); 1643extern int ext4_setattr(struct dentry *, struct iattr *);
1572extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 1644extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
1573 struct kstat *stat); 1645 struct kstat *stat);
1574extern void ext4_delete_inode(struct inode *); 1646extern void ext4_evict_inode(struct inode *);
1647extern void ext4_clear_inode(struct inode *);
1575extern int ext4_sync_inode(handle_t *, struct inode *); 1648extern int ext4_sync_inode(handle_t *, struct inode *);
1576extern void ext4_dirty_inode(struct inode *); 1649extern void ext4_dirty_inode(struct inode *);
1577extern int ext4_change_inode_journal_flag(struct inode *, int); 1650extern int ext4_change_inode_journal_flag(struct inode *, int);
@@ -1601,8 +1674,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
1601extern int ext4_ext_migrate(struct inode *); 1674extern int ext4_ext_migrate(struct inode *);
1602 1675
1603/* namei.c */ 1676/* namei.c */
1604extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
1605extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
1606extern int ext4_orphan_add(handle_t *, struct inode *); 1677extern int ext4_orphan_add(handle_t *, struct inode *);
1607extern int ext4_orphan_del(handle_t *, struct inode *); 1678extern int ext4_orphan_del(handle_t *, struct inode *);
1608extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, 1679extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -1616,25 +1687,38 @@ extern int ext4_group_extend(struct super_block *sb,
1616 ext4_fsblk_t n_blocks_count); 1687 ext4_fsblk_t n_blocks_count);
1617 1688
1618/* super.c */ 1689/* super.c */
1619extern void __ext4_error(struct super_block *, const char *, const char *, ...) 1690extern void __ext4_error(struct super_block *, const char *, unsigned int,
1620 __attribute__ ((format (printf, 3, 4))); 1691 const char *, ...)
1621#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) 1692 __attribute__ ((format (printf, 4, 5)));
1622extern void ext4_error_inode(const char *, struct inode *, const char *, ...) 1693#define ext4_error(sb, message...) __ext4_error(sb, __func__, \
1623 __attribute__ ((format (printf, 3, 4))); 1694 __LINE__, ## message)
1624extern void ext4_error_file(const char *, struct file *, const char *, ...) 1695extern void ext4_error_inode(struct inode *, const char *, unsigned int,
1625 __attribute__ ((format (printf, 3, 4))); 1696 ext4_fsblk_t, const char *, ...)
1626extern void __ext4_std_error(struct super_block *, const char *, int); 1697 __attribute__ ((format (printf, 5, 6)));
1627extern void ext4_abort(struct super_block *, const char *, const char *, ...) 1698extern void ext4_error_file(struct file *, const char *, unsigned int,
1628 __attribute__ ((format (printf, 3, 4))); 1699 const char *, ...)
1629extern void __ext4_warning(struct super_block *, const char *, 1700 __attribute__ ((format (printf, 4, 5)));
1701extern void __ext4_std_error(struct super_block *, const char *,
1702 unsigned int, int);
1703extern void __ext4_abort(struct super_block *, const char *, unsigned int,
1704 const char *, ...)
1705 __attribute__ ((format (printf, 4, 5)));
1706#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \
1707 __LINE__, ## message)
1708extern void __ext4_warning(struct super_block *, const char *, unsigned int,
1630 const char *, ...) 1709 const char *, ...)
1631 __attribute__ ((format (printf, 3, 4))); 1710 __attribute__ ((format (printf, 4, 5)));
1632#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) 1711#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \
1712 __LINE__, ## message)
1633extern void ext4_msg(struct super_block *, const char *, const char *, ...) 1713extern void ext4_msg(struct super_block *, const char *, const char *, ...)
1634 __attribute__ ((format (printf, 3, 4))); 1714 __attribute__ ((format (printf, 3, 4)));
1635extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, 1715extern void __ext4_grp_locked_error(const char *, unsigned int, \
1636 const char *, const char *, ...) 1716 struct super_block *, ext4_group_t, \
1637 __attribute__ ((format (printf, 4, 5))); 1717 unsigned long, ext4_fsblk_t, \
1718 const char *, ...)
1719 __attribute__ ((format (printf, 7, 8)));
1720#define ext4_grp_locked_error(sb, grp, message...) \
1721 __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
1638extern void ext4_update_dynamic_rev(struct super_block *sb); 1722extern void ext4_update_dynamic_rev(struct super_block *sb);
1639extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, 1723extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1640 __u32 compat); 1724 __u32 compat);
@@ -1768,7 +1852,7 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
1768#define ext4_std_error(sb, errno) \ 1852#define ext4_std_error(sb, errno) \
1769do { \ 1853do { \
1770 if ((errno)) \ 1854 if ((errno)) \
1771 __ext4_std_error((sb), __func__, (errno)); \ 1855 __ext4_std_error((sb), __func__, __LINE__, (errno)); \
1772} while (0) 1856} while (0)
1773 1857
1774#ifdef CONFIG_SMP 1858#ifdef CONFIG_SMP
@@ -1860,6 +1944,12 @@ static inline void ext4_unlock_group(struct super_block *sb,
1860 spin_unlock(ext4_group_lock_ptr(sb, group)); 1944 spin_unlock(ext4_group_lock_ptr(sb, group));
1861} 1945}
1862 1946
1947static inline void ext4_mark_super_dirty(struct super_block *sb)
1948{
1949 if (EXT4_SB(sb)->s_journal == NULL)
1950 sb->s_dirt =1;
1951}
1952
1863/* 1953/*
1864 * Inodes and files operations 1954 * Inodes and files operations
1865 */ 1955 */
@@ -1905,9 +1995,6 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
1905 ssize_t len); 1995 ssize_t len);
1906extern int ext4_map_blocks(handle_t *handle, struct inode *inode, 1996extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
1907 struct ext4_map_blocks *map, int flags); 1997 struct ext4_map_blocks *map, int flags);
1908extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1909 sector_t block, unsigned int max_blocks,
1910 struct buffer_head *bh, int flags);
1911extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1998extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1912 __u64 start, __u64 len); 1999 __u64 start, __u64 len);
1913/* move_extent.c */ 2000/* move_extent.c */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 53d2764d71ca..6e272ef6ba96 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,29 +6,29 @@
6 6
7#include <trace/events/ext4.h> 7#include <trace/events/ext4.h>
8 8
9int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 9int __ext4_journal_get_undo_access(const char *where, unsigned int line,
10 struct buffer_head *bh) 10 handle_t *handle, struct buffer_head *bh)
11{ 11{
12 int err = 0; 12 int err = 0;
13 13
14 if (ext4_handle_valid(handle)) { 14 if (ext4_handle_valid(handle)) {
15 err = jbd2_journal_get_undo_access(handle, bh); 15 err = jbd2_journal_get_undo_access(handle, bh);
16 if (err) 16 if (err)
17 ext4_journal_abort_handle(where, __func__, bh, 17 ext4_journal_abort_handle(where, line, __func__, bh,
18 handle, err); 18 handle, err);
19 } 19 }
20 return err; 20 return err;
21} 21}
22 22
23int __ext4_journal_get_write_access(const char *where, handle_t *handle, 23int __ext4_journal_get_write_access(const char *where, unsigned int line,
24 struct buffer_head *bh) 24 handle_t *handle, struct buffer_head *bh)
25{ 25{
26 int err = 0; 26 int err = 0;
27 27
28 if (ext4_handle_valid(handle)) { 28 if (ext4_handle_valid(handle)) {
29 err = jbd2_journal_get_write_access(handle, bh); 29 err = jbd2_journal_get_write_access(handle, bh);
30 if (err) 30 if (err)
31 ext4_journal_abort_handle(where, __func__, bh, 31 ext4_journal_abort_handle(where, line, __func__, bh,
32 handle, err); 32 handle, err);
33 } 33 }
34 return err; 34 return err;
@@ -46,9 +46,9 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
46 * If the handle isn't valid we're not journaling, but we still need to 46 * If the handle isn't valid we're not journaling, but we still need to
47 * call into ext4_journal_revoke() to put the buffer head. 47 * call into ext4_journal_revoke() to put the buffer head.
48 */ 48 */
49int __ext4_forget(const char *where, handle_t *handle, int is_metadata, 49int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
50 struct inode *inode, struct buffer_head *bh, 50 int is_metadata, struct inode *inode,
51 ext4_fsblk_t blocknr) 51 struct buffer_head *bh, ext4_fsblk_t blocknr)
52{ 52{
53 int err; 53 int err;
54 54
@@ -79,8 +79,8 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
79 BUFFER_TRACE(bh, "call jbd2_journal_forget"); 79 BUFFER_TRACE(bh, "call jbd2_journal_forget");
80 err = jbd2_journal_forget(handle, bh); 80 err = jbd2_journal_forget(handle, bh);
81 if (err) 81 if (err)
82 ext4_journal_abort_handle(where, __func__, bh, 82 ext4_journal_abort_handle(where, line, __func__,
83 handle, err); 83 bh, handle, err);
84 return err; 84 return err;
85 } 85 }
86 return 0; 86 return 0;
@@ -92,15 +92,16 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
92 BUFFER_TRACE(bh, "call jbd2_journal_revoke"); 92 BUFFER_TRACE(bh, "call jbd2_journal_revoke");
93 err = jbd2_journal_revoke(handle, blocknr, bh); 93 err = jbd2_journal_revoke(handle, blocknr, bh);
94 if (err) { 94 if (err) {
95 ext4_journal_abort_handle(where, __func__, bh, handle, err); 95 ext4_journal_abort_handle(where, line, __func__,
96 ext4_abort(inode->i_sb, __func__, 96 bh, handle, err);
97 __ext4_abort(inode->i_sb, where, line,
97 "error %d when attempting revoke", err); 98 "error %d when attempting revoke", err);
98 } 99 }
99 BUFFER_TRACE(bh, "exit"); 100 BUFFER_TRACE(bh, "exit");
100 return err; 101 return err;
101} 102}
102 103
103int __ext4_journal_get_create_access(const char *where, 104int __ext4_journal_get_create_access(const char *where, unsigned int line,
104 handle_t *handle, struct buffer_head *bh) 105 handle_t *handle, struct buffer_head *bh)
105{ 106{
106 int err = 0; 107 int err = 0;
@@ -108,22 +109,23 @@ int __ext4_journal_get_create_access(const char *where,
108 if (ext4_handle_valid(handle)) { 109 if (ext4_handle_valid(handle)) {
109 err = jbd2_journal_get_create_access(handle, bh); 110 err = jbd2_journal_get_create_access(handle, bh);
110 if (err) 111 if (err)
111 ext4_journal_abort_handle(where, __func__, bh, 112 ext4_journal_abort_handle(where, line, __func__,
112 handle, err); 113 bh, handle, err);
113 } 114 }
114 return err; 115 return err;
115} 116}
116 117
117int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, 118int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
118 struct inode *inode, struct buffer_head *bh) 119 handle_t *handle, struct inode *inode,
120 struct buffer_head *bh)
119{ 121{
120 int err = 0; 122 int err = 0;
121 123
122 if (ext4_handle_valid(handle)) { 124 if (ext4_handle_valid(handle)) {
123 err = jbd2_journal_dirty_metadata(handle, bh); 125 err = jbd2_journal_dirty_metadata(handle, bh);
124 if (err) 126 if (err)
125 ext4_journal_abort_handle(where, __func__, bh, 127 ext4_journal_abort_handle(where, line, __func__,
126 handle, err); 128 bh, handle, err);
127 } else { 129 } else {
128 if (inode) 130 if (inode)
129 mark_buffer_dirty_inode(bh, inode); 131 mark_buffer_dirty_inode(bh, inode);
@@ -132,14 +134,33 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
132 if (inode && inode_needs_sync(inode)) { 134 if (inode && inode_needs_sync(inode)) {
133 sync_dirty_buffer(bh); 135 sync_dirty_buffer(bh);
134 if (buffer_req(bh) && !buffer_uptodate(bh)) { 136 if (buffer_req(bh) && !buffer_uptodate(bh)) {
135 ext4_error(inode->i_sb, 137 struct ext4_super_block *es;
136 "IO error syncing inode, " 138
137 "inode=%lu, block=%llu", 139 es = EXT4_SB(inode->i_sb)->s_es;
138 inode->i_ino, 140 es->s_last_error_block =
139 (unsigned long long) bh->b_blocknr); 141 cpu_to_le64(bh->b_blocknr);
142 ext4_error_inode(inode, where, line,
143 bh->b_blocknr,
144 "IO error syncing itable block");
140 err = -EIO; 145 err = -EIO;
141 } 146 }
142 } 147 }
143 } 148 }
144 return err; 149 return err;
145} 150}
151
152int __ext4_handle_dirty_super(const char *where, unsigned int line,
153 handle_t *handle, struct super_block *sb)
154{
155 struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
156 int err = 0;
157
158 if (ext4_handle_valid(handle)) {
159 err = jbd2_journal_dirty_metadata(handle, bh);
160 if (err)
161 ext4_journal_abort_handle(where, line, __func__,
162 bh, handle, err);
163 } else
164 sb->s_dirt = 1;
165 return err;
166}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index dade0c024797..b0bd792c58c5 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -122,39 +122,47 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
122/* 122/*
123 * Wrapper functions with which ext4 calls into JBD. 123 * Wrapper functions with which ext4 calls into JBD.
124 */ 124 */
125void ext4_journal_abort_handle(const char *caller, const char *err_fn, 125void ext4_journal_abort_handle(const char *caller, unsigned int line,
126 const char *err_fn,
126 struct buffer_head *bh, handle_t *handle, int err); 127 struct buffer_head *bh, handle_t *handle, int err);
127 128
128int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 129int __ext4_journal_get_undo_access(const char *where, unsigned int line,
129 struct buffer_head *bh); 130 handle_t *handle, struct buffer_head *bh);
130 131
131int __ext4_journal_get_write_access(const char *where, handle_t *handle, 132int __ext4_journal_get_write_access(const char *where, unsigned int line,
132 struct buffer_head *bh); 133 handle_t *handle, struct buffer_head *bh);
133 134
134int __ext4_forget(const char *where, handle_t *handle, int is_metadata, 135int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
135 struct inode *inode, struct buffer_head *bh, 136 int is_metadata, struct inode *inode,
136 ext4_fsblk_t blocknr); 137 struct buffer_head *bh, ext4_fsblk_t blocknr);
137 138
138int __ext4_journal_get_create_access(const char *where, 139int __ext4_journal_get_create_access(const char *where, unsigned int line,
139 handle_t *handle, struct buffer_head *bh); 140 handle_t *handle, struct buffer_head *bh);
140 141
141int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, 142int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
142 struct inode *inode, struct buffer_head *bh); 143 handle_t *handle, struct inode *inode,
144 struct buffer_head *bh);
145
146int __ext4_handle_dirty_super(const char *where, unsigned int line,
147 handle_t *handle, struct super_block *sb);
143 148
144#define ext4_journal_get_undo_access(handle, bh) \ 149#define ext4_journal_get_undo_access(handle, bh) \
145 __ext4_journal_get_undo_access(__func__, (handle), (bh)) 150 __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
146#define ext4_journal_get_write_access(handle, bh) \ 151#define ext4_journal_get_write_access(handle, bh) \
147 __ext4_journal_get_write_access(__func__, (handle), (bh)) 152 __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
148#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ 153#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
149 __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ 154 __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
150 (block_nr)) 155 (bh), (block_nr))
151#define ext4_journal_get_create_access(handle, bh) \ 156#define ext4_journal_get_create_access(handle, bh) \
152 __ext4_journal_get_create_access(__func__, (handle), (bh)) 157 __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
153#define ext4_handle_dirty_metadata(handle, inode, bh) \ 158#define ext4_handle_dirty_metadata(handle, inode, bh) \
154 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) 159 __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
160 (bh))
161#define ext4_handle_dirty_super(handle, sb) \
162 __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
155 163
156handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); 164handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
157int __ext4_journal_stop(const char *where, handle_t *handle); 165int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
158 166
159#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) 167#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
160 168
@@ -207,7 +215,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
207} 215}
208 216
209#define ext4_journal_stop(handle) \ 217#define ext4_journal_stop(handle) \
210 __ext4_journal_stop(__func__, (handle)) 218 __ext4_journal_stop(__func__, __LINE__, (handle))
211 219
212static inline handle_t *ext4_journal_current_handle(void) 220static inline handle_t *ext4_journal_current_handle(void)
213{ 221{
@@ -308,17 +316,15 @@ static inline int ext4_should_writeback_data(struct inode *inode)
308 * This function controls whether or not we should try to go down the 316 * This function controls whether or not we should try to go down the
309 * dioread_nolock code paths, which makes it safe to avoid taking 317 * dioread_nolock code paths, which makes it safe to avoid taking
310 * i_mutex for direct I/O reads. This only works for extent-based 318 * i_mutex for direct I/O reads. This only works for extent-based
311 * files, and it doesn't work for nobh or if data journaling is 319 * files, and it doesn't work if data journaling is enabled, since the
312 * enabled, since the dioread_nolock code uses b_private to pass 320 * dioread_nolock code uses b_private to pass information back to the
313 * information back to the I/O completion handler, and this conflicts 321 * I/O completion handler, and this conflicts with the jbd's use of
314 * with the jbd's use of b_private. 322 * b_private.
315 */ 323 */
316static inline int ext4_should_dioread_nolock(struct inode *inode) 324static inline int ext4_should_dioread_nolock(struct inode *inode)
317{ 325{
318 if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) 326 if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
319 return 0; 327 return 0;
320 if (test_opt(inode->i_sb, NOBH))
321 return 0;
322 if (!S_ISREG(inode->i_mode)) 328 if (!S_ISREG(inode->i_mode))
323 return 0; 329 return 0;
324 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 330 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 377309c1af65..06328d3e5717 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -401,9 +401,9 @@ static int ext4_valid_extent_entries(struct inode *inode,
401 return 1; 401 return 1;
402} 402}
403 403
404static int __ext4_ext_check(const char *function, struct inode *inode, 404static int __ext4_ext_check(const char *function, unsigned int line,
405 struct ext4_extent_header *eh, 405 struct inode *inode, struct ext4_extent_header *eh,
406 int depth) 406 int depth)
407{ 407{
408 const char *error_msg; 408 const char *error_msg;
409 int max = 0; 409 int max = 0;
@@ -436,7 +436,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
436 return 0; 436 return 0;
437 437
438corrupted: 438corrupted:
439 ext4_error_inode(function, inode, 439 ext4_error_inode(inode, function, line, 0,
440 "bad header/extent: %s - magic %x, " 440 "bad header/extent: %s - magic %x, "
441 "entries %u, max %u(%u), depth %u(%u)", 441 "entries %u, max %u(%u), depth %u(%u)",
442 error_msg, le16_to_cpu(eh->eh_magic), 442 error_msg, le16_to_cpu(eh->eh_magic),
@@ -447,7 +447,7 @@ corrupted:
447} 447}
448 448
449#define ext4_ext_check(inode, eh, depth) \ 449#define ext4_ext_check(inode, eh, depth) \
450 __ext4_ext_check(__func__, inode, eh, depth) 450 __ext4_ext_check(__func__, __LINE__, inode, eh, depth)
451 451
452int ext4_ext_check_inode(struct inode *inode) 452int ext4_ext_check_inode(struct inode *inode)
453{ 453{
@@ -1083,7 +1083,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1083{ 1083{
1084 struct ext4_ext_path *curp = path; 1084 struct ext4_ext_path *curp = path;
1085 struct ext4_extent_header *neh; 1085 struct ext4_extent_header *neh;
1086 struct ext4_extent_idx *fidx;
1087 struct buffer_head *bh; 1086 struct buffer_head *bh;
1088 ext4_fsblk_t newblock; 1087 ext4_fsblk_t newblock;
1089 int err = 0; 1088 int err = 0;
@@ -1144,10 +1143,10 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1144 ext4_idx_store_pblock(curp->p_idx, newblock); 1143 ext4_idx_store_pblock(curp->p_idx, newblock);
1145 1144
1146 neh = ext_inode_hdr(inode); 1145 neh = ext_inode_hdr(inode);
1147 fidx = EXT_FIRST_INDEX(neh);
1148 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1146 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1149 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1147 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1150 le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); 1148 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1149 idx_pblock(EXT_FIRST_INDEX(neh)));
1151 1150
1152 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1151 neh->eh_depth = cpu_to_le16(path->p_depth + 1);
1153 err = ext4_ext_dirty(handle, inode, curp); 1152 err = ext4_ext_dirty(handle, inode, curp);
@@ -2937,7 +2936,7 @@ fix_extent_len:
2937 * One of more index blocks maybe needed if the extent tree grow after 2936 * One of more index blocks maybe needed if the extent tree grow after
2938 * the unintialized extent split. To prevent ENOSPC occur at the IO 2937 * the unintialized extent split. To prevent ENOSPC occur at the IO
2939 * complete, we need to split the uninitialized extent before DIO submit 2938 * complete, we need to split the uninitialized extent before DIO submit
2940 * the IO. The uninitilized extent called at this time will be split 2939 * the IO. The uninitialized extent called at this time will be split
2941 * into three uninitialized extent(at most). After IO complete, the part 2940 * into three uninitialized extent(at most). After IO complete, the part
2942 * being filled will be convert to initialized by the end_io callback function 2941 * being filled will be convert to initialized by the end_io callback function
2943 * via ext4_convert_unwritten_extents(). 2942 * via ext4_convert_unwritten_extents().
@@ -2954,7 +2953,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2954 struct ext4_extent *ex1 = NULL; 2953 struct ext4_extent *ex1 = NULL;
2955 struct ext4_extent *ex2 = NULL; 2954 struct ext4_extent *ex2 = NULL;
2956 struct ext4_extent *ex3 = NULL; 2955 struct ext4_extent *ex3 = NULL;
2957 struct ext4_extent_header *eh;
2958 ext4_lblk_t ee_block, eof_block; 2956 ext4_lblk_t ee_block, eof_block;
2959 unsigned int allocated, ee_len, depth; 2957 unsigned int allocated, ee_len, depth;
2960 ext4_fsblk_t newblock; 2958 ext4_fsblk_t newblock;
@@ -2971,7 +2969,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2971 eof_block = map->m_lblk + map->m_len; 2969 eof_block = map->m_lblk + map->m_len;
2972 2970
2973 depth = ext_depth(inode); 2971 depth = ext_depth(inode);
2974 eh = path[depth].p_hdr;
2975 ex = path[depth].p_ext; 2972 ex = path[depth].p_ext;
2976 ee_block = le32_to_cpu(ex->ee_block); 2973 ee_block = le32_to_cpu(ex->ee_block);
2977 ee_len = ext4_ext_get_actual_len(ex); 2974 ee_len = ext4_ext_get_actual_len(ex);
@@ -3058,7 +3055,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3058 err = PTR_ERR(path); 3055 err = PTR_ERR(path);
3059 goto out; 3056 goto out;
3060 } 3057 }
3061 eh = path[depth].p_hdr;
3062 ex = path[depth].p_ext; 3058 ex = path[depth].p_ext;
3063 if (ex2 != &newex) 3059 if (ex2 != &newex)
3064 ex2 = ex; 3060 ex2 = ex;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5313ae4cda2d..ee92b66d4558 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -70,7 +70,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
70 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 70 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
71 size_t length = iov_length(iov, nr_segs); 71 size_t length = iov_length(iov, nr_segs);
72 72
73 if (pos > sbi->s_bitmap_maxbytes) 73 if ((pos > sbi->s_bitmap_maxbytes ||
74 (pos == sbi->s_bitmap_maxbytes && length > 0)))
74 return -EFBIG; 75 return -EFBIG;
75 76
76 if (pos + length > sbi->s_bitmap_maxbytes) { 77 if (pos + length > sbi->s_bitmap_maxbytes) {
@@ -123,7 +124,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
123 if (!IS_ERR(cp)) { 124 if (!IS_ERR(cp)) {
124 memcpy(sbi->s_es->s_last_mounted, cp, 125 memcpy(sbi->s_es->s_last_mounted, cp,
125 sizeof(sbi->s_es->s_last_mounted)); 126 sizeof(sbi->s_es->s_last_mounted));
126 sb->s_dirt = 1; 127 ext4_mark_super_dirty(sb);
127 } 128 }
128 } 129 }
129 return dquot_file_open(inode, filp); 130 return dquot_file_open(inode, filp);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25c4b3173fd9..45853e0d1f21 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -222,7 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
222 is_directory = S_ISDIR(inode->i_mode); 222 is_directory = S_ISDIR(inode->i_mode);
223 223
224 /* Do this BEFORE marking the inode not in use or returning an error */ 224 /* Do this BEFORE marking the inode not in use or returning an error */
225 clear_inode(inode); 225 ext4_clear_inode(inode);
226 226
227 es = EXT4_SB(sb)->s_es; 227 es = EXT4_SB(sb)->s_es;
228 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 228 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
@@ -279,7 +279,7 @@ out:
279 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 279 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
280 if (!fatal) 280 if (!fatal)
281 fatal = err; 281 fatal = err;
282 sb->s_dirt = 1; 282 ext4_mark_super_dirty(sb);
283 } else 283 } else
284 ext4_error(sb, "bit already cleared for inode %lu", ino); 284 ext4_error(sb, "bit already cleared for inode %lu", ino);
285 285
@@ -965,7 +965,7 @@ got:
965 percpu_counter_dec(&sbi->s_freeinodes_counter); 965 percpu_counter_dec(&sbi->s_freeinodes_counter);
966 if (S_ISDIR(mode)) 966 if (S_ISDIR(mode))
967 percpu_counter_inc(&sbi->s_dirs_counter); 967 percpu_counter_inc(&sbi->s_dirs_counter);
968 sb->s_dirt = 1; 968 ext4_mark_super_dirty(sb);
969 969
970 if (sbi->s_log_groups_per_flex) { 970 if (sbi->s_log_groups_per_flex) {
971 flex_group = ext4_flex_group(sbi, group); 971 flex_group = ext4_flex_group(sbi, group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d67955a..4b8debeb3965 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
167/* 167/*
168 * Called at the last iput() if i_nlink is zero. 168 * Called at the last iput() if i_nlink is zero.
169 */ 169 */
170void ext4_delete_inode(struct inode *inode) 170void ext4_evict_inode(struct inode *inode)
171{ 171{
172 handle_t *handle; 172 handle_t *handle;
173 int err; 173 int err;
174 174
175 if (inode->i_nlink) {
176 truncate_inode_pages(&inode->i_data, 0);
177 goto no_delete;
178 }
179
175 if (!is_bad_inode(inode)) 180 if (!is_bad_inode(inode))
176 dquot_initialize(inode); 181 dquot_initialize(inode);
177 182
@@ -221,6 +226,7 @@ void ext4_delete_inode(struct inode *inode)
221 "couldn't extend journal (err %d)", err); 226 "couldn't extend journal (err %d)", err);
222 stop_handle: 227 stop_handle:
223 ext4_journal_stop(handle); 228 ext4_journal_stop(handle);
229 ext4_orphan_del(NULL, inode);
224 goto no_delete; 230 goto no_delete;
225 } 231 }
226 } 232 }
@@ -245,13 +251,13 @@ void ext4_delete_inode(struct inode *inode)
245 */ 251 */
246 if (ext4_mark_inode_dirty(handle, inode)) 252 if (ext4_mark_inode_dirty(handle, inode))
247 /* If that failed, just do the required in-core inode clear. */ 253 /* If that failed, just do the required in-core inode clear. */
248 clear_inode(inode); 254 ext4_clear_inode(inode);
249 else 255 else
250 ext4_free_inode(handle, inode); 256 ext4_free_inode(handle, inode);
251 ext4_journal_stop(handle); 257 ext4_journal_stop(handle);
252 return; 258 return;
253no_delete: 259no_delete:
254 clear_inode(inode); /* We must guarantee clearing of inode... */ 260 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
255} 261}
256 262
257typedef struct { 263typedef struct {
@@ -337,9 +343,11 @@ static int ext4_block_to_path(struct inode *inode,
337 return n; 343 return n;
338} 344}
339 345
340static int __ext4_check_blockref(const char *function, struct inode *inode, 346static int __ext4_check_blockref(const char *function, unsigned int line,
347 struct inode *inode,
341 __le32 *p, unsigned int max) 348 __le32 *p, unsigned int max)
342{ 349{
350 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
343 __le32 *bref = p; 351 __le32 *bref = p;
344 unsigned int blk; 352 unsigned int blk;
345 353
@@ -348,8 +356,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
348 if (blk && 356 if (blk &&
349 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 357 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
350 blk, 1))) { 358 blk, 1))) {
351 ext4_error_inode(function, inode, 359 es->s_last_error_block = cpu_to_le64(blk);
352 "invalid block reference %u", blk); 360 ext4_error_inode(inode, function, line, blk,
361 "invalid block");
353 return -EIO; 362 return -EIO;
354 } 363 }
355 } 364 }
@@ -358,11 +367,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
358 367
359 368
360#define ext4_check_indirect_blockref(inode, bh) \ 369#define ext4_check_indirect_blockref(inode, bh) \
361 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ 370 __ext4_check_blockref(__func__, __LINE__, inode, \
371 (__le32 *)(bh)->b_data, \
362 EXT4_ADDR_PER_BLOCK((inode)->i_sb)) 372 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
363 373
364#define ext4_check_inode_blockref(inode) \ 374#define ext4_check_inode_blockref(inode) \
365 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ 375 __ext4_check_blockref(__func__, __LINE__, inode, \
376 EXT4_I(inode)->i_data, \
366 EXT4_NDIR_BLOCKS) 377 EXT4_NDIR_BLOCKS)
367 378
368/** 379/**
@@ -1128,20 +1139,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
1128 ext4_discard_preallocations(inode); 1139 ext4_discard_preallocations(inode);
1129} 1140}
1130 1141
1131static int check_block_validity(struct inode *inode, const char *func, 1142static int __check_block_validity(struct inode *inode, const char *func,
1143 unsigned int line,
1132 struct ext4_map_blocks *map) 1144 struct ext4_map_blocks *map)
1133{ 1145{
1134 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, 1146 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
1135 map->m_len)) { 1147 map->m_len)) {
1136 ext4_error_inode(func, inode, 1148 ext4_error_inode(inode, func, line, map->m_pblk,
1137 "lblock %lu mapped to illegal pblock %llu " 1149 "lblock %lu mapped to illegal pblock "
1138 "(length %d)", (unsigned long) map->m_lblk, 1150 "(length %d)", (unsigned long) map->m_lblk,
1139 map->m_pblk, map->m_len); 1151 map->m_len);
1140 return -EIO; 1152 return -EIO;
1141 } 1153 }
1142 return 0; 1154 return 0;
1143} 1155}
1144 1156
1157#define check_block_validity(inode, map) \
1158 __check_block_validity((inode), __func__, __LINE__, (map))
1159
1145/* 1160/*
1146 * Return the number of contiguous dirty pages in a given inode 1161 * Return the number of contiguous dirty pages in a given inode
1147 * starting at page frame idx. 1162 * starting at page frame idx.
@@ -1244,7 +1259,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1244 up_read((&EXT4_I(inode)->i_data_sem)); 1259 up_read((&EXT4_I(inode)->i_data_sem));
1245 1260
1246 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1261 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
1247 int ret = check_block_validity(inode, __func__, map); 1262 int ret = check_block_validity(inode, map);
1248 if (ret != 0) 1263 if (ret != 0)
1249 return ret; 1264 return ret;
1250 } 1265 }
@@ -1324,9 +1339,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1324 1339
1325 up_write((&EXT4_I(inode)->i_data_sem)); 1340 up_write((&EXT4_I(inode)->i_data_sem));
1326 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1341 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
1327 int ret = check_block_validity(inode, 1342 int ret = check_block_validity(inode, map);
1328 "ext4_map_blocks_after_alloc",
1329 map);
1330 if (ret != 0) 1343 if (ret != 0)
1331 return ret; 1344 return ret;
1332 } 1345 }
@@ -1519,9 +1532,25 @@ static int walk_page_buffers(handle_t *handle,
1519static int do_journal_get_write_access(handle_t *handle, 1532static int do_journal_get_write_access(handle_t *handle,
1520 struct buffer_head *bh) 1533 struct buffer_head *bh)
1521{ 1534{
1535 int dirty = buffer_dirty(bh);
1536 int ret;
1537
1522 if (!buffer_mapped(bh) || buffer_freed(bh)) 1538 if (!buffer_mapped(bh) || buffer_freed(bh))
1523 return 0; 1539 return 0;
1524 return ext4_journal_get_write_access(handle, bh); 1540 /*
1541 * __block_prepare_write() could have dirtied some buffers. Clean
1542 * the dirty bit as jbd2_journal_get_write_access() could complain
1543 * otherwise about fs integrity issues. Setting of the dirty bit
1544 * by __block_prepare_write() isn't a real problem here as we clear
1545 * the bit before releasing a page lock and thus writeback cannot
1546 * ever write the buffer.
1547 */
1548 if (dirty)
1549 clear_buffer_dirty(bh);
1550 ret = ext4_journal_get_write_access(handle, bh);
1551 if (!ret && dirty)
1552 ret = ext4_handle_dirty_metadata(handle, NULL, bh);
1553 return ret;
1525} 1554}
1526 1555
1527/* 1556/*
@@ -1578,11 +1607,9 @@ retry:
1578 *pagep = page; 1607 *pagep = page;
1579 1608
1580 if (ext4_should_dioread_nolock(inode)) 1609 if (ext4_should_dioread_nolock(inode))
1581 ret = block_write_begin(file, mapping, pos, len, flags, pagep, 1610 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
1582 fsdata, ext4_get_block_write);
1583 else 1611 else
1584 ret = block_write_begin(file, mapping, pos, len, flags, pagep, 1612 ret = __block_write_begin(page, pos, len, ext4_get_block);
1585 fsdata, ext4_get_block);
1586 1613
1587 if (!ret && ext4_should_journal_data(inode)) { 1614 if (!ret && ext4_should_journal_data(inode)) {
1588 ret = walk_page_buffers(handle, page_buffers(page), 1615 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1593,7 +1620,7 @@ retry:
1593 unlock_page(page); 1620 unlock_page(page);
1594 page_cache_release(page); 1621 page_cache_release(page);
1595 /* 1622 /*
1596 * block_write_begin may have instantiated a few blocks 1623 * __block_write_begin may have instantiated a few blocks
1597 * outside i_size. Trim these off again. Don't need 1624 * outside i_size. Trim these off again. Don't need
1598 * i_size_read because we hold i_mutex. 1625 * i_size_read because we hold i_mutex.
1599 * 1626 *
@@ -2194,7 +2221,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2194 BUG_ON(!handle); 2221 BUG_ON(!handle);
2195 2222
2196 /* 2223 /*
2197 * Call ext4_get_blocks() to allocate any delayed allocation 2224 * Call ext4_map_blocks() to allocate any delayed allocation
2198 * blocks, or to convert an uninitialized extent to be 2225 * blocks, or to convert an uninitialized extent to be
2199 * initialized (in the case where we have written into 2226 * initialized (in the case where we have written into
2200 * one or more preallocated blocks). 2227 * one or more preallocated blocks).
@@ -2203,7 +2230,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2203 * indicate that we are on the delayed allocation path. This 2230 * indicate that we are on the delayed allocation path. This
2204 * affects functions in many different parts of the allocation 2231 * affects functions in many different parts of the allocation
2205 * call path. This flag exists primarily because we don't 2232 * call path. This flag exists primarily because we don't
2206 * want to change *many* call functions, so ext4_get_blocks() 2233 * want to change *many* call functions, so ext4_map_blocks()
2207 * will set the magic i_delalloc_reserved_flag once the 2234 * will set the magic i_delalloc_reserved_flag once the
2208 * inode's allocation semaphore is taken. 2235 * inode's allocation semaphore is taken.
2209 * 2236 *
@@ -2221,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2221 2248
2222 blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); 2249 blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
2223 if (blks < 0) { 2250 if (blks < 0) {
2251 struct super_block *sb = mpd->inode->i_sb;
2252
2224 err = blks; 2253 err = blks;
2225 /* 2254 /*
2226 * If get block returns with error we simply 2255 * If get block returns with error we simply
@@ -2231,7 +2260,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2231 return 0; 2260 return 0;
2232 2261
2233 if (err == -ENOSPC && 2262 if (err == -ENOSPC &&
2234 ext4_count_free_blocks(mpd->inode->i_sb)) { 2263 ext4_count_free_blocks(sb)) {
2235 mpd->retval = err; 2264 mpd->retval = err;
2236 return 0; 2265 return 0;
2237 } 2266 }
@@ -2243,16 +2272,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2243 * writepage and writepages will again try to write 2272 * writepage and writepages will again try to write
2244 * the same. 2273 * the same.
2245 */ 2274 */
2246 ext4_msg(mpd->inode->i_sb, KERN_CRIT, 2275 if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
2247 "delayed block allocation failed for inode %lu at " 2276 ext4_msg(sb, KERN_CRIT,
2248 "logical offset %llu with max blocks %zd with " 2277 "delayed block allocation failed for inode %lu "
2249 "error %d", mpd->inode->i_ino, 2278 "at logical offset %llu with max blocks %zd "
2250 (unsigned long long) next, 2279 "with error %d", mpd->inode->i_ino,
2251 mpd->b_size >> mpd->inode->i_blkbits, err); 2280 (unsigned long long) next,
2252 printk(KERN_CRIT "This should not happen!! " 2281 mpd->b_size >> mpd->inode->i_blkbits, err);
2253 "Data will be lost\n"); 2282 ext4_msg(sb, KERN_CRIT,
2254 if (err == -ENOSPC) { 2283 "This should not happen!! Data will be lost\n");
2255 ext4_print_free_blocks(mpd->inode); 2284 if (err == -ENOSPC)
2285 ext4_print_free_blocks(mpd->inode);
2256 } 2286 }
2257 /* invalidate all the pages */ 2287 /* invalidate all the pages */
2258 ext4_da_block_invalidatepages(mpd, next, 2288 ext4_da_block_invalidatepages(mpd, next,
@@ -2320,7 +2350,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
2320 * XXX Don't go larger than mballoc is willing to allocate 2350 * XXX Don't go larger than mballoc is willing to allocate
2321 * This is a stopgap solution. We eventually need to fold 2351 * This is a stopgap solution. We eventually need to fold
2322 * mpage_da_submit_io() into this function and then call 2352 * mpage_da_submit_io() into this function and then call
2323 * ext4_get_blocks() multiple times in a loop 2353 * ext4_map_blocks() multiple times in a loop
2324 */ 2354 */
2325 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) 2355 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
2326 goto flush_it; 2356 goto flush_it;
@@ -2553,18 +2583,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2553/* 2583/*
2554 * This function is used as a standard get_block_t calback function 2584 * This function is used as a standard get_block_t calback function
2555 * when there is no desire to allocate any blocks. It is used as a 2585 * when there is no desire to allocate any blocks. It is used as a
2556 * callback function for block_prepare_write(), nobh_writepage(), and 2586 * callback function for block_prepare_write() and block_write_full_page().
2557 * block_write_full_page(). These functions should only try to map a 2587 * These functions should only try to map a single block at a time.
2558 * single block at a time.
2559 * 2588 *
2560 * Since this function doesn't do block allocations even if the caller 2589 * Since this function doesn't do block allocations even if the caller
2561 * requests it by passing in create=1, it is critically important that 2590 * requests it by passing in create=1, it is critically important that
2562 * any caller checks to make sure that any buffer heads are returned 2591 * any caller checks to make sure that any buffer heads are returned
2563 * by this function are either all already mapped or marked for 2592 * by this function are either all already mapped or marked for
2564 * delayed allocation before calling nobh_writepage() or 2593 * delayed allocation before calling block_write_full_page(). Otherwise,
2565 * block_write_full_page(). Otherwise, b_blocknr could be left 2594 * b_blocknr could be left unitialized, and the page write functions will
2566 * unitialized, and the page write functions will be taken by 2595 * be taken by surprise.
2567 * surprise.
2568 */ 2596 */
2569static int noalloc_get_block_write(struct inode *inode, sector_t iblock, 2597static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2570 struct buffer_head *bh_result, int create) 2598 struct buffer_head *bh_result, int create)
@@ -2749,9 +2777,7 @@ static int ext4_writepage(struct page *page,
2749 return __ext4_journalled_writepage(page, len); 2777 return __ext4_journalled_writepage(page, len);
2750 } 2778 }
2751 2779
2752 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2780 if (page_bufs && buffer_uninit(page_bufs)) {
2753 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2754 else if (page_bufs && buffer_uninit(page_bufs)) {
2755 ext4_set_bh_endio(page_bufs, inode); 2781 ext4_set_bh_endio(page_bufs, inode);
2756 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2782 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2757 wbc, ext4_end_io_buffer_write); 2783 wbc, ext4_end_io_buffer_write);
@@ -3146,13 +3172,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3146 int ret, retries = 0; 3172 int ret, retries = 0;
3147 struct page *page; 3173 struct page *page;
3148 pgoff_t index; 3174 pgoff_t index;
3149 unsigned from, to;
3150 struct inode *inode = mapping->host; 3175 struct inode *inode = mapping->host;
3151 handle_t *handle; 3176 handle_t *handle;
3152 3177
3153 index = pos >> PAGE_CACHE_SHIFT; 3178 index = pos >> PAGE_CACHE_SHIFT;
3154 from = pos & (PAGE_CACHE_SIZE - 1);
3155 to = from + len;
3156 3179
3157 if (ext4_nonda_switch(inode->i_sb)) { 3180 if (ext4_nonda_switch(inode->i_sb)) {
3158 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 3181 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -3185,8 +3208,7 @@ retry:
3185 } 3208 }
3186 *pagep = page; 3209 *pagep = page;
3187 3210
3188 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 3211 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
3189 ext4_da_get_block_prep);
3190 if (ret < 0) { 3212 if (ret < 0) {
3191 unlock_page(page); 3213 unlock_page(page);
3192 ext4_journal_stop(handle); 3214 ext4_journal_stop(handle);
@@ -3545,15 +3567,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3545 3567
3546retry: 3568retry:
3547 if (rw == READ && ext4_should_dioread_nolock(inode)) 3569 if (rw == READ && ext4_should_dioread_nolock(inode))
3548 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 3570 ret = __blockdev_direct_IO(rw, iocb, inode,
3549 inode->i_sb->s_bdev, iov, 3571 inode->i_sb->s_bdev, iov,
3550 offset, nr_segs, 3572 offset, nr_segs,
3551 ext4_get_block, NULL); 3573 ext4_get_block, NULL, NULL, 0);
3552 else 3574 else {
3553 ret = blockdev_direct_IO(rw, iocb, inode, 3575 ret = blockdev_direct_IO(rw, iocb, inode,
3554 inode->i_sb->s_bdev, iov, 3576 inode->i_sb->s_bdev, iov,
3555 offset, nr_segs, 3577 offset, nr_segs,
3556 ext4_get_block, NULL); 3578 ext4_get_block, NULL);
3579
3580 if (unlikely((rw & WRITE) && ret < 0)) {
3581 loff_t isize = i_size_read(inode);
3582 loff_t end = offset + iov_length(iov, nr_segs);
3583
3584 if (end > isize)
3585 vmtruncate(inode, isize);
3586 }
3587 }
3557 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3588 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3558 goto retry; 3589 goto retry;
3559 3590
@@ -3668,6 +3699,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
3668 return ret; 3699 return ret;
3669 } 3700 }
3670 3701
3702 if (io->iocb)
3703 aio_complete(io->iocb, io->result, 0);
3671 /* clear the DIO AIO unwritten flag */ 3704 /* clear the DIO AIO unwritten flag */
3672 io->flag = 0; 3705 io->flag = 0;
3673 return ret; 3706 return ret;
@@ -3767,6 +3800,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3767 io->offset = 0; 3800 io->offset = 0;
3768 io->size = 0; 3801 io->size = 0;
3769 io->page = NULL; 3802 io->page = NULL;
3803 io->iocb = NULL;
3804 io->result = 0;
3770 INIT_WORK(&io->work, ext4_end_io_work); 3805 INIT_WORK(&io->work, ext4_end_io_work);
3771 INIT_LIST_HEAD(&io->list); 3806 INIT_LIST_HEAD(&io->list);
3772 } 3807 }
@@ -3775,7 +3810,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3775} 3810}
3776 3811
3777static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3812static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3778 ssize_t size, void *private) 3813 ssize_t size, void *private, int ret,
3814 bool is_async)
3779{ 3815{
3780 ext4_io_end_t *io_end = iocb->private; 3816 ext4_io_end_t *io_end = iocb->private;
3781 struct workqueue_struct *wq; 3817 struct workqueue_struct *wq;
@@ -3784,7 +3820,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3784 3820
3785 /* if not async direct IO or dio with 0 bytes write, just return */ 3821 /* if not async direct IO or dio with 0 bytes write, just return */
3786 if (!io_end || !size) 3822 if (!io_end || !size)
3787 return; 3823 goto out;
3788 3824
3789 ext_debug("ext4_end_io_dio(): io_end 0x%p" 3825 ext_debug("ext4_end_io_dio(): io_end 0x%p"
3790 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 3826 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,12 +3831,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3795 if (io_end->flag != EXT4_IO_UNWRITTEN){ 3831 if (io_end->flag != EXT4_IO_UNWRITTEN){
3796 ext4_free_io_end(io_end); 3832 ext4_free_io_end(io_end);
3797 iocb->private = NULL; 3833 iocb->private = NULL;
3834out:
3835 if (is_async)
3836 aio_complete(iocb, ret, 0);
3798 return; 3837 return;
3799 } 3838 }
3800 3839
3801 io_end->offset = offset; 3840 io_end->offset = offset;
3802 io_end->size = size; 3841 io_end->size = size;
3803 io_end->flag = EXT4_IO_UNWRITTEN; 3842 if (is_async) {
3843 io_end->iocb = iocb;
3844 io_end->result = ret;
3845 }
3804 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3846 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3805 3847
3806 /* queue the work to convert unwritten extents to written */ 3848 /* queue the work to convert unwritten extents to written */
@@ -3937,7 +3979,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3937 return -ENOMEM; 3979 return -ENOMEM;
3938 /* 3980 /*
3939 * we save the io structure for current async 3981 * we save the io structure for current async
3940 * direct IO, so that later ext4_get_blocks() 3982 * direct IO, so that later ext4_map_blocks()
3941 * could flag the io structure whether there 3983 * could flag the io structure whether there
3942 * is a unwritten extents needs to be converted 3984 * is a unwritten extents needs to be converted
3943 * when IO is completed. 3985 * when IO is completed.
@@ -4128,17 +4170,6 @@ int ext4_block_truncate_page(handle_t *handle,
4128 length = blocksize - (offset & (blocksize - 1)); 4170 length = blocksize - (offset & (blocksize - 1));
4129 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 4171 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
4130 4172
4131 /*
4132 * For "nobh" option, we can only work if we don't need to
4133 * read-in the page - otherwise we create buffers to do the IO.
4134 */
4135 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
4136 ext4_should_writeback_data(inode) && PageUptodate(page)) {
4137 zero_user(page, offset, length);
4138 set_page_dirty(page);
4139 goto unlock;
4140 }
4141
4142 if (!page_has_buffers(page)) 4173 if (!page_has_buffers(page))
4143 create_empty_buffers(page, blocksize, 0); 4174 create_empty_buffers(page, blocksize, 0);
4144 4175
@@ -4488,9 +4519,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4488 * (should be rare). 4519 * (should be rare).
4489 */ 4520 */
4490 if (!bh) { 4521 if (!bh) {
4491 EXT4_ERROR_INODE(inode, 4522 EXT4_ERROR_INODE_BLOCK(inode, nr,
4492 "Read failure block=%llu", 4523 "Read failure");
4493 (unsigned long long) nr);
4494 continue; 4524 continue;
4495 } 4525 }
4496 4526
@@ -4502,27 +4532,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4502 depth); 4532 depth);
4503 4533
4504 /* 4534 /*
4505 * We've probably journalled the indirect block several
4506 * times during the truncate. But it's no longer
4507 * needed and we now drop it from the transaction via
4508 * jbd2_journal_revoke().
4509 *
4510 * That's easy if it's exclusively part of this
4511 * transaction. But if it's part of the committing
4512 * transaction then jbd2_journal_forget() will simply
4513 * brelse() it. That means that if the underlying
4514 * block is reallocated in ext4_get_block(),
4515 * unmap_underlying_metadata() will find this block
4516 * and will try to get rid of it. damn, damn.
4517 *
4518 * If this block has already been committed to the
4519 * journal, a revoke record will be written. And
4520 * revoke records must be emitted *before* clearing
4521 * this block's bit in the bitmaps.
4522 */
4523 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
4524
4525 /*
4526 * Everything below this this pointer has been 4535 * Everything below this this pointer has been
4527 * released. Now let this top-of-subtree go. 4536 * released. Now let this top-of-subtree go.
4528 * 4537 *
@@ -4546,8 +4555,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4546 blocks_for_truncate(inode)); 4555 blocks_for_truncate(inode));
4547 } 4556 }
4548 4557
4558 /*
4559 * The forget flag here is critical because if
4560 * we are journaling (and not doing data
4561 * journaling), we have to make sure a revoke
4562 * record is written to prevent the journal
4563 * replay from overwriting the (former)
4564 * indirect block if it gets reallocated as a
4565 * data block. This must happen in the same
4566 * transaction where the data blocks are
4567 * actually freed.
4568 */
4549 ext4_free_blocks(handle, inode, 0, nr, 1, 4569 ext4_free_blocks(handle, inode, 0, nr, 1,
4550 EXT4_FREE_BLOCKS_METADATA); 4570 EXT4_FREE_BLOCKS_METADATA|
4571 EXT4_FREE_BLOCKS_FORGET);
4551 4572
4552 if (parent_bh) { 4573 if (parent_bh) {
4553 /* 4574 /*
@@ -4805,8 +4826,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
4805 4826
4806 bh = sb_getblk(sb, block); 4827 bh = sb_getblk(sb, block);
4807 if (!bh) { 4828 if (!bh) {
4808 EXT4_ERROR_INODE(inode, "unable to read inode block - " 4829 EXT4_ERROR_INODE_BLOCK(inode, block,
4809 "block %llu", block); 4830 "unable to read itable block");
4810 return -EIO; 4831 return -EIO;
4811 } 4832 }
4812 if (!buffer_uptodate(bh)) { 4833 if (!buffer_uptodate(bh)) {
@@ -4904,8 +4925,8 @@ make_io:
4904 submit_bh(READ_META, bh); 4925 submit_bh(READ_META, bh);
4905 wait_on_buffer(bh); 4926 wait_on_buffer(bh);
4906 if (!buffer_uptodate(bh)) { 4927 if (!buffer_uptodate(bh)) {
4907 EXT4_ERROR_INODE(inode, "unable to read inode " 4928 EXT4_ERROR_INODE_BLOCK(inode, block,
4908 "block %llu", block); 4929 "unable to read itable block");
4909 brelse(bh); 4930 brelse(bh);
4910 return -EIO; 4931 return -EIO;
4911 } 4932 }
@@ -4976,7 +4997,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
4976 /* we are using combined 48 bit field */ 4997 /* we are using combined 48 bit field */
4977 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | 4998 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
4978 le32_to_cpu(raw_inode->i_blocks_lo); 4999 le32_to_cpu(raw_inode->i_blocks_lo);
4979 if (ei->i_flags & EXT4_HUGE_FILE_FL) { 5000 if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
4980 /* i_blocks represent file system block size */ 5001 /* i_blocks represent file system block size */
4981 return i_blocks << (inode->i_blkbits - 9); 5002 return i_blocks << (inode->i_blkbits - 9);
4982 } else { 5003 } else {
@@ -5072,7 +5093,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
5072 transaction_t *transaction; 5093 transaction_t *transaction;
5073 tid_t tid; 5094 tid_t tid;
5074 5095
5075 spin_lock(&journal->j_state_lock); 5096 read_lock(&journal->j_state_lock);
5076 if (journal->j_running_transaction) 5097 if (journal->j_running_transaction)
5077 transaction = journal->j_running_transaction; 5098 transaction = journal->j_running_transaction;
5078 else 5099 else
@@ -5081,7 +5102,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
5081 tid = transaction->t_tid; 5102 tid = transaction->t_tid;
5082 else 5103 else
5083 tid = journal->j_commit_sequence; 5104 tid = journal->j_commit_sequence;
5084 spin_unlock(&journal->j_state_lock); 5105 read_unlock(&journal->j_state_lock);
5085 ei->i_sync_tid = tid; 5106 ei->i_sync_tid = tid;
5086 ei->i_datasync_tid = tid; 5107 ei->i_datasync_tid = tid;
5087 } 5108 }
@@ -5126,7 +5147,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
5126 ei->i_file_acl); 5147 ei->i_file_acl);
5127 ret = -EIO; 5148 ret = -EIO;
5128 goto bad_inode; 5149 goto bad_inode;
5129 } else if (ei->i_flags & EXT4_EXTENTS_FL) { 5150 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5130 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 5151 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
5131 (S_ISLNK(inode->i_mode) && 5152 (S_ISLNK(inode->i_mode) &&
5132 !ext4_inode_is_fast_symlink(inode))) 5153 !ext4_inode_is_fast_symlink(inode)))
@@ -5406,9 +5427,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5406 if (wbc->sync_mode == WB_SYNC_ALL) 5427 if (wbc->sync_mode == WB_SYNC_ALL)
5407 sync_dirty_buffer(iloc.bh); 5428 sync_dirty_buffer(iloc.bh);
5408 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 5429 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
5409 EXT4_ERROR_INODE(inode, 5430 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
5410 "IO error syncing inode (block=%llu)", 5431 "IO error syncing inode");
5411 (unsigned long long) iloc.bh->b_blocknr);
5412 err = -EIO; 5432 err = -EIO;
5413 } 5433 }
5414 brelse(iloc.bh); 5434 brelse(iloc.bh);
@@ -5483,10 +5503,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5483 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5503 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
5484 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5504 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5485 5505
5486 if (attr->ia_size > sbi->s_bitmap_maxbytes) { 5506 if (attr->ia_size > sbi->s_bitmap_maxbytes)
5487 error = -EFBIG; 5507 return -EFBIG;
5488 goto err_out;
5489 }
5490 } 5508 }
5491 } 5509 }
5492 5510
@@ -5529,11 +5547,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5529 ext4_truncate(inode); 5547 ext4_truncate(inode);
5530 } 5548 }
5531 5549
5532 rc = inode_setattr(inode, attr); 5550 if ((attr->ia_valid & ATTR_SIZE) &&
5551 attr->ia_size != i_size_read(inode))
5552 rc = vmtruncate(inode, attr->ia_size);
5553
5554 if (!rc) {
5555 setattr_copy(inode, attr);
5556 mark_inode_dirty(inode);
5557 }
5533 5558
5534 /* If inode_setattr's call to ext4_truncate failed to get a 5559 /*
5535 * transaction handle at all, we need to clean up the in-core 5560 * If the call to ext4_truncate failed to get a transaction handle at
5536 * orphan list manually. */ 5561 * all, we need to clean up the in-core orphan list manually.
5562 */
5537 if (inode->i_nlink) 5563 if (inode->i_nlink)
5538 ext4_orphan_del(NULL, inode); 5564 ext4_orphan_del(NULL, inode);
5539 5565
@@ -5688,7 +5714,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
5688 * Calculate the journal credits for a chunk of data modification. 5714 * Calculate the journal credits for a chunk of data modification.
5689 * 5715 *
5690 * This is called from DIO, fallocate or whoever calling 5716 * This is called from DIO, fallocate or whoever calling
5691 * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. 5717 * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
5692 * 5718 *
5693 * journal buffers for data blocks are not included here, as DIO 5719 * journal buffers for data blocks are not included here, as DIO
5694 * and fallocate do no need to journal data buffers. 5720 * and fallocate do no need to journal data buffers.
@@ -5754,7 +5780,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
5754{ 5780{
5755 struct ext4_inode *raw_inode; 5781 struct ext4_inode *raw_inode;
5756 struct ext4_xattr_ibody_header *header; 5782 struct ext4_xattr_ibody_header *header;
5757 struct ext4_xattr_entry *entry;
5758 5783
5759 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) 5784 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
5760 return 0; 5785 return 0;
@@ -5762,7 +5787,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
5762 raw_inode = ext4_raw_inode(&iloc); 5787 raw_inode = ext4_raw_inode(&iloc);
5763 5788
5764 header = IHDR(inode, raw_inode); 5789 header = IHDR(inode, raw_inode);
5765 entry = IFIRST(header);
5766 5790
5767 /* No extended attributes present */ 5791 /* No extended attributes present */
5768 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 5792 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 12b3bc026a68..4b4ad4b7ce57 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -446,10 +446,11 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
446 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 446 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
447 blocknr += first + i; 447 blocknr += first + i;
448 ext4_grp_locked_error(sb, e4b->bd_group, 448 ext4_grp_locked_error(sb, e4b->bd_group,
449 __func__, "double-free of inode" 449 inode ? inode->i_ino : 0,
450 " %lu's block %llu(bit %u in group %u)", 450 blocknr,
451 inode ? inode->i_ino : 0, blocknr, 451 "freeing block already freed "
452 first + i, e4b->bd_group); 452 "(bit %u)",
453 first + i);
453 } 454 }
454 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); 455 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
455 } 456 }
@@ -712,9 +713,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
712 grp->bb_fragments = fragments; 713 grp->bb_fragments = fragments;
713 714
714 if (free != grp->bb_free) { 715 if (free != grp->bb_free) {
715 ext4_grp_locked_error(sb, group, __func__, 716 ext4_grp_locked_error(sb, group, 0, 0,
716 "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", 717 "%u blocks in bitmap, %u in gd",
717 group, free, grp->bb_free); 718 free, grp->bb_free);
718 /* 719 /*
719 * If we intent to continue, we consider group descritor 720 * If we intent to continue, we consider group descritor
720 * corrupt and update bb_free using bitmap value 721 * corrupt and update bb_free using bitmap value
@@ -1296,10 +1297,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1296 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1297 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1297 blocknr += block; 1298 blocknr += block;
1298 ext4_grp_locked_error(sb, e4b->bd_group, 1299 ext4_grp_locked_error(sb, e4b->bd_group,
1299 __func__, "double-free of inode" 1300 inode ? inode->i_ino : 0,
1300 " %lu's block %llu(bit %u in group %u)", 1301 blocknr,
1301 inode ? inode->i_ino : 0, blocknr, block, 1302 "freeing already freed block "
1302 e4b->bd_group); 1303 "(bit %u)", block);
1303 } 1304 }
1304 mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1305 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1305 e4b->bd_info->bb_counters[order]++; 1306 e4b->bd_info->bb_counters[order]++;
@@ -1788,8 +1789,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1788 * free blocks even though group info says we 1789 * free blocks even though group info says we
1789 * we have free blocks 1790 * we have free blocks
1790 */ 1791 */
1791 ext4_grp_locked_error(sb, e4b->bd_group, 1792 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1792 __func__, "%d free blocks as per " 1793 "%d free blocks as per "
1793 "group info. But bitmap says 0", 1794 "group info. But bitmap says 0",
1794 free); 1795 free);
1795 break; 1796 break;
@@ -1798,8 +1799,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1798 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1799 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1799 BUG_ON(ex.fe_len <= 0); 1800 BUG_ON(ex.fe_len <= 0);
1800 if (free < ex.fe_len) { 1801 if (free < ex.fe_len) {
1801 ext4_grp_locked_error(sb, e4b->bd_group, 1802 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1802 __func__, "%d free blocks as per " 1803 "%d free blocks as per "
1803 "group info. But got %d blocks", 1804 "group info. But got %d blocks",
1804 free, ex.fe_len); 1805 free, ex.fe_len);
1805 /* 1806 /*
@@ -1821,8 +1822,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1821 1822
1822/* 1823/*
1823 * This is a special case for storages like raid5 1824 * This is a special case for storages like raid5
1824 * we try to find stripe-aligned chunks for stripe-size requests 1825 * we try to find stripe-aligned chunks for stripe-size-multiple requests
1825 * XXX should do so at least for multiples of stripe size as well
1826 */ 1826 */
1827static noinline_for_stack 1827static noinline_for_stack
1828void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, 1828void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
@@ -1999,7 +1999,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1999 ext4_group_t ngroups, group, i; 1999 ext4_group_t ngroups, group, i;
2000 int cr; 2000 int cr;
2001 int err = 0; 2001 int err = 0;
2002 int bsbits;
2003 struct ext4_sb_info *sbi; 2002 struct ext4_sb_info *sbi;
2004 struct super_block *sb; 2003 struct super_block *sb;
2005 struct ext4_buddy e4b; 2004 struct ext4_buddy e4b;
@@ -2041,8 +2040,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2041 ac->ac_2order = i - 1; 2040 ac->ac_2order = i - 1;
2042 } 2041 }
2043 2042
2044 bsbits = ac->ac_sb->s_blocksize_bits;
2045
2046 /* if stream allocation is enabled, use global goal */ 2043 /* if stream allocation is enabled, use global goal */
2047 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { 2044 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2048 /* TBD: may be hot point */ 2045 /* TBD: may be hot point */
@@ -2094,8 +2091,8 @@ repeat:
2094 ac->ac_groups_scanned++; 2091 ac->ac_groups_scanned++;
2095 if (cr == 0) 2092 if (cr == 0)
2096 ext4_mb_simple_scan_group(ac, &e4b); 2093 ext4_mb_simple_scan_group(ac, &e4b);
2097 else if (cr == 1 && 2094 else if (cr == 1 && sbi->s_stripe &&
2098 ac->ac_g_ex.fe_len == sbi->s_stripe) 2095 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2099 ext4_mb_scan_aligned(ac, &e4b); 2096 ext4_mb_scan_aligned(ac, &e4b);
2100 else 2097 else
2101 ext4_mb_complex_scan_group(ac, &e4b); 2098 ext4_mb_complex_scan_group(ac, &e4b);
@@ -2221,7 +2218,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2221 2218
2222 rc = seq_open(file, &ext4_mb_seq_groups_ops); 2219 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2223 if (rc == 0) { 2220 if (rc == 0) {
2224 struct seq_file *m = (struct seq_file *)file->private_data; 2221 struct seq_file *m = file->private_data;
2225 m->private = sb; 2222 m->private = sb;
2226 } 2223 }
2227 return rc; 2224 return rc;
@@ -2560,6 +2557,22 @@ int ext4_mb_release(struct super_block *sb)
2560 return 0; 2557 return 0;
2561} 2558}
2562 2559
2560static inline void ext4_issue_discard(struct super_block *sb,
2561 ext4_group_t block_group, ext4_grpblk_t block, int count)
2562{
2563 int ret;
2564 ext4_fsblk_t discard_block;
2565
2566 discard_block = block + ext4_group_first_block_no(sb, block_group);
2567 trace_ext4_discard_blocks(sb,
2568 (unsigned long long) discard_block, count);
2569 ret = sb_issue_discard(sb, discard_block, count);
2570 if (ret == EOPNOTSUPP) {
2571 ext4_warning(sb, "discard not supported, disabling");
2572 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2573 }
2574}
2575
2563/* 2576/*
2564 * This function is called by the jbd2 layer once the commit has finished, 2577 * This function is called by the jbd2 layer once the commit has finished,
2565 * so we know we can free the blocks that were released with that commit. 2578 * so we know we can free the blocks that were released with that commit.
@@ -2579,22 +2592,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2579 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2592 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2580 entry->count, entry->group, entry); 2593 entry->count, entry->group, entry);
2581 2594
2582 if (test_opt(sb, DISCARD)) { 2595 if (test_opt(sb, DISCARD))
2583 int ret; 2596 ext4_issue_discard(sb, entry->group,
2584 ext4_fsblk_t discard_block; 2597 entry->start_blk, entry->count);
2585
2586 discard_block = entry->start_blk +
2587 ext4_group_first_block_no(sb, entry->group);
2588 trace_ext4_discard_blocks(sb,
2589 (unsigned long long)discard_block,
2590 entry->count);
2591 ret = sb_issue_discard(sb, discard_block, entry->count);
2592 if (ret == EOPNOTSUPP) {
2593 ext4_warning(sb,
2594 "discard not supported, disabling");
2595 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2596 }
2597 }
2598 2598
2599 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2599 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2600 /* we expect to find existing buddy because it's pinned */ 2600 /* we expect to find existing buddy because it's pinned */
@@ -2704,7 +2704,7 @@ void exit_ext4_mballoc(void)
2704 2704
2705 2705
2706/* 2706/*
2707 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps 2707 * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
2708 * Returns 0 if success or error code 2708 * Returns 0 if success or error code
2709 */ 2709 */
2710static noinline_for_stack int 2710static noinline_for_stack int
@@ -2712,7 +2712,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2712 handle_t *handle, unsigned int reserv_blks) 2712 handle_t *handle, unsigned int reserv_blks)
2713{ 2713{
2714 struct buffer_head *bitmap_bh = NULL; 2714 struct buffer_head *bitmap_bh = NULL;
2715 struct ext4_super_block *es;
2716 struct ext4_group_desc *gdp; 2715 struct ext4_group_desc *gdp;
2717 struct buffer_head *gdp_bh; 2716 struct buffer_head *gdp_bh;
2718 struct ext4_sb_info *sbi; 2717 struct ext4_sb_info *sbi;
@@ -2725,8 +2724,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2725 2724
2726 sb = ac->ac_sb; 2725 sb = ac->ac_sb;
2727 sbi = EXT4_SB(sb); 2726 sbi = EXT4_SB(sb);
2728 es = sbi->s_es;
2729
2730 2727
2731 err = -EIO; 2728 err = -EIO;
2732 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); 2729 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2812,7 +2809,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2812 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); 2809 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2813 2810
2814out_err: 2811out_err:
2815 sb->s_dirt = 1; 2812 ext4_mark_super_dirty(sb);
2816 brelse(bitmap_bh); 2813 brelse(bitmap_bh);
2817 return err; 2814 return err;
2818} 2815}
@@ -2850,7 +2847,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2850 int bsbits, max; 2847 int bsbits, max;
2851 ext4_lblk_t end; 2848 ext4_lblk_t end;
2852 loff_t size, orig_size, start_off; 2849 loff_t size, orig_size, start_off;
2853 ext4_lblk_t start, orig_start; 2850 ext4_lblk_t start;
2854 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2851 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2855 struct ext4_prealloc_space *pa; 2852 struct ext4_prealloc_space *pa;
2856 2853
@@ -2881,6 +2878,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2881 size = size << bsbits; 2878 size = size << bsbits;
2882 if (size < i_size_read(ac->ac_inode)) 2879 if (size < i_size_read(ac->ac_inode))
2883 size = i_size_read(ac->ac_inode); 2880 size = i_size_read(ac->ac_inode);
2881 orig_size = size;
2884 2882
2885 /* max size of free chunks */ 2883 /* max size of free chunks */
2886 max = 2 << bsbits; 2884 max = 2 << bsbits;
@@ -2922,8 +2920,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2922 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; 2920 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
2923 size = ac->ac_o_ex.fe_len << bsbits; 2921 size = ac->ac_o_ex.fe_len << bsbits;
2924 } 2922 }
2925 orig_size = size = size >> bsbits; 2923 size = size >> bsbits;
2926 orig_start = start = start_off >> bsbits; 2924 start = start_off >> bsbits;
2927 2925
2928 /* don't cover already allocated blocks in selected range */ 2926 /* don't cover already allocated blocks in selected range */
2929 if (ar->pleft && start <= ar->lleft) { 2927 if (ar->pleft && start <= ar->lleft) {
@@ -3547,7 +3545,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3547 ext4_group_t group; 3545 ext4_group_t group;
3548 ext4_grpblk_t bit; 3546 ext4_grpblk_t bit;
3549 unsigned long long grp_blk_start; 3547 unsigned long long grp_blk_start;
3550 sector_t start;
3551 int err = 0; 3548 int err = 0;
3552 int free = 0; 3549 int free = 0;
3553 3550
@@ -3567,10 +3564,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3567 if (bit >= end) 3564 if (bit >= end)
3568 break; 3565 break;
3569 next = mb_find_next_bit(bitmap_bh->b_data, end, bit); 3566 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3570 start = ext4_group_first_block_no(sb, group) + bit;
3571 mb_debug(1, " free preallocated %u/%u in group %u\n", 3567 mb_debug(1, " free preallocated %u/%u in group %u\n",
3572 (unsigned) start, (unsigned) next - bit, 3568 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3573 (unsigned) group); 3569 (unsigned) next - bit, (unsigned) group);
3574 free += next - bit; 3570 free += next - bit;
3575 3571
3576 if (ac) { 3572 if (ac) {
@@ -3581,7 +3577,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3581 trace_ext4_mballoc_discard(ac); 3577 trace_ext4_mballoc_discard(ac);
3582 } 3578 }
3583 3579
3584 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, 3580 trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
3585 next - bit); 3581 next - bit);
3586 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3582 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3587 bit = next + 1; 3583 bit = next + 1;
@@ -3591,8 +3587,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3591 pa, (unsigned long) pa->pa_lstart, 3587 pa, (unsigned long) pa->pa_lstart,
3592 (unsigned long) pa->pa_pstart, 3588 (unsigned long) pa->pa_pstart,
3593 (unsigned long) pa->pa_len); 3589 (unsigned long) pa->pa_len);
3594 ext4_grp_locked_error(sb, group, 3590 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3595 __func__, "free %u, pa_free %u",
3596 free, pa->pa_free); 3591 free, pa->pa_free);
3597 /* 3592 /*
3598 * pa is already deleted so we use the value obtained 3593 * pa is already deleted so we use the value obtained
@@ -3613,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3613 ext4_group_t group; 3608 ext4_group_t group;
3614 ext4_grpblk_t bit; 3609 ext4_grpblk_t bit;
3615 3610
3616 trace_ext4_mb_release_group_pa(ac, pa); 3611 trace_ext4_mb_release_group_pa(sb, ac, pa);
3617 BUG_ON(pa->pa_deleted == 0); 3612 BUG_ON(pa->pa_deleted == 0);
3618 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3613 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3619 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3614 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3889 struct super_block *sb = ac->ac_sb; 3884 struct super_block *sb = ac->ac_sb;
3890 ext4_group_t ngroups, i; 3885 ext4_group_t ngroups, i;
3891 3886
3887 if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
3888 return;
3889
3892 printk(KERN_ERR "EXT4-fs: Can't allocate:" 3890 printk(KERN_ERR "EXT4-fs: Can't allocate:"
3893 " Allocation context details:\n"); 3891 " Allocation context details:\n");
3894 printk(KERN_ERR "EXT4-fs: status %d flags %d\n", 3892 printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
@@ -4255,7 +4253,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4255 * to usual allocation 4253 * to usual allocation
4256 */ 4254 */
4257ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, 4255ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4258 struct ext4_allocation_request *ar, int *errp) 4256 struct ext4_allocation_request *ar, int *errp)
4259{ 4257{
4260 int freed; 4258 int freed;
4261 struct ext4_allocation_context *ac = NULL; 4259 struct ext4_allocation_context *ac = NULL;
@@ -4299,7 +4297,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4299 inquota = ar->len; 4297 inquota = ar->len;
4300 if (ar->len == 0) { 4298 if (ar->len == 0) {
4301 *errp = -EDQUOT; 4299 *errp = -EDQUOT;
4302 goto out3; 4300 goto out;
4303 } 4301 }
4304 } 4302 }
4305 4303
@@ -4307,13 +4305,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4307 if (!ac) { 4305 if (!ac) {
4308 ar->len = 0; 4306 ar->len = 0;
4309 *errp = -ENOMEM; 4307 *errp = -ENOMEM;
4310 goto out1; 4308 goto out;
4311 } 4309 }
4312 4310
4313 *errp = ext4_mb_initialize_context(ac, ar); 4311 *errp = ext4_mb_initialize_context(ac, ar);
4314 if (*errp) { 4312 if (*errp) {
4315 ar->len = 0; 4313 ar->len = 0;
4316 goto out2; 4314 goto out;
4317 } 4315 }
4318 4316
4319 ac->ac_op = EXT4_MB_HISTORY_PREALLOC; 4317 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
@@ -4322,7 +4320,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4322 ext4_mb_normalize_request(ac, ar); 4320 ext4_mb_normalize_request(ac, ar);
4323repeat: 4321repeat:
4324 /* allocate space in core */ 4322 /* allocate space in core */
4325 ext4_mb_regular_allocator(ac); 4323 *errp = ext4_mb_regular_allocator(ac);
4324 if (*errp)
4325 goto errout;
4326 4326
4327 /* as we've just preallocated more space than 4327 /* as we've just preallocated more space than
4328 * user requested orinally, we store allocated 4328 * user requested orinally, we store allocated
@@ -4333,7 +4333,7 @@ repeat:
4333 } 4333 }
4334 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4334 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4335 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); 4335 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4336 if (*errp == -EAGAIN) { 4336 if (*errp == -EAGAIN) {
4337 /* 4337 /*
4338 * drop the reference that we took 4338 * drop the reference that we took
4339 * in ext4_mb_use_best_found 4339 * in ext4_mb_use_best_found
@@ -4344,12 +4344,10 @@ repeat:
4344 ac->ac_b_ex.fe_len = 0; 4344 ac->ac_b_ex.fe_len = 0;
4345 ac->ac_status = AC_STATUS_CONTINUE; 4345 ac->ac_status = AC_STATUS_CONTINUE;
4346 goto repeat; 4346 goto repeat;
4347 } else if (*errp) { 4347 } else if (*errp)
4348 errout:
4348 ext4_discard_allocated_blocks(ac); 4349 ext4_discard_allocated_blocks(ac);
4349 ac->ac_b_ex.fe_len = 0; 4350 else {
4350 ar->len = 0;
4351 ext4_mb_show_ac(ac);
4352 } else {
4353 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 4351 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4354 ar->len = ac->ac_b_ex.fe_len; 4352 ar->len = ac->ac_b_ex.fe_len;
4355 } 4353 }
@@ -4358,19 +4356,19 @@ repeat:
4358 if (freed) 4356 if (freed)
4359 goto repeat; 4357 goto repeat;
4360 *errp = -ENOSPC; 4358 *errp = -ENOSPC;
4359 }
4360
4361 if (*errp) {
4361 ac->ac_b_ex.fe_len = 0; 4362 ac->ac_b_ex.fe_len = 0;
4362 ar->len = 0; 4363 ar->len = 0;
4363 ext4_mb_show_ac(ac); 4364 ext4_mb_show_ac(ac);
4364 } 4365 }
4365
4366 ext4_mb_release_context(ac); 4366 ext4_mb_release_context(ac);
4367 4367out:
4368out2: 4368 if (ac)
4369 kmem_cache_free(ext4_ac_cachep, ac); 4369 kmem_cache_free(ext4_ac_cachep, ac);
4370out1:
4371 if (inquota && ar->len < inquota) 4370 if (inquota && ar->len < inquota)
4372 dquot_free_block(ar->inode, inquota - ar->len); 4371 dquot_free_block(ar->inode, inquota - ar->len);
4373out3:
4374 if (!ar->len) { 4372 if (!ar->len) {
4375 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4373 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4376 /* release all the reserved blocks if non delalloc */ 4374 /* release all the reserved blocks if non delalloc */
@@ -4402,6 +4400,7 @@ static noinline_for_stack int
4402ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4400ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4403 struct ext4_free_data *new_entry) 4401 struct ext4_free_data *new_entry)
4404{ 4402{
4403 ext4_group_t group = e4b->bd_group;
4405 ext4_grpblk_t block; 4404 ext4_grpblk_t block;
4406 struct ext4_free_data *entry; 4405 struct ext4_free_data *entry;
4407 struct ext4_group_info *db = e4b->bd_info; 4406 struct ext4_group_info *db = e4b->bd_info;
@@ -4434,9 +4433,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4434 else if (block >= (entry->start_blk + entry->count)) 4433 else if (block >= (entry->start_blk + entry->count))
4435 n = &(*n)->rb_right; 4434 n = &(*n)->rb_right;
4436 else { 4435 else {
4437 ext4_grp_locked_error(sb, e4b->bd_group, __func__, 4436 ext4_grp_locked_error(sb, group, 0,
4438 "Double free of blocks %d (%d %d)", 4437 ext4_group_first_block_no(sb, group) + block,
4439 block, entry->start_blk, entry->count); 4438 "Block already on to-be-freed list");
4440 return 0; 4439 return 0;
4441 } 4440 }
4442 } 4441 }
@@ -4494,7 +4493,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4494 struct super_block *sb = inode->i_sb; 4493 struct super_block *sb = inode->i_sb;
4495 struct ext4_allocation_context *ac = NULL; 4494 struct ext4_allocation_context *ac = NULL;
4496 struct ext4_group_desc *gdp; 4495 struct ext4_group_desc *gdp;
4497 struct ext4_super_block *es;
4498 unsigned long freed = 0; 4496 unsigned long freed = 0;
4499 unsigned int overflow; 4497 unsigned int overflow;
4500 ext4_grpblk_t bit; 4498 ext4_grpblk_t bit;
@@ -4513,7 +4511,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4513 } 4511 }
4514 4512
4515 sbi = EXT4_SB(sb); 4513 sbi = EXT4_SB(sb);
4516 es = EXT4_SB(sb)->s_es;
4517 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && 4514 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4518 !ext4_data_block_valid(sbi, block, count)) { 4515 !ext4_data_block_valid(sbi, block, count)) {
4519 ext4_error(sb, "Freeing blocks not in datazone - " 4516 ext4_error(sb, "Freeing blocks not in datazone - "
@@ -4647,6 +4644,8 @@ do_more:
4647 mb_clear_bits(bitmap_bh->b_data, bit, count); 4644 mb_clear_bits(bitmap_bh->b_data, bit, count);
4648 mb_free_blocks(inode, &e4b, bit, count); 4645 mb_free_blocks(inode, &e4b, bit, count);
4649 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4646 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4647 if (test_opt(sb, DISCARD))
4648 ext4_issue_discard(sb, block_group, bit, count);
4650 } 4649 }
4651 4650
4652 ret = ext4_free_blks_count(sb, gdp) + count; 4651 ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4680,7 +4679,7 @@ do_more:
4680 put_bh(bitmap_bh); 4679 put_bh(bitmap_bh);
4681 goto do_more; 4680 goto do_more;
4682 } 4681 }
4683 sb->s_dirt = 1; 4682 ext4_mark_super_dirty(sb);
4684error_return: 4683error_return:
4685 if (freed) 4684 if (freed)
4686 dquot_free_block(inode, freed); 4685 dquot_free_block(inode, freed);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 6f3a27ec30bf..1765c2c50a9b 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
376 * We have the extent map build with the tmp inode. 376 * We have the extent map build with the tmp inode.
377 * Now copy the i_data across 377 * Now copy the i_data across
378 */ 378 */
379 ei->i_flags |= EXT4_EXTENTS_FL; 379 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
380 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 380 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
381 381
382 /* 382 /*
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 52abfa12762a..5f1ed9fc913c 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -148,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
148 */ 148 */
149static int 149static int
150mext_check_null_inode(struct inode *inode1, struct inode *inode2, 150mext_check_null_inode(struct inode *inode1, struct inode *inode2,
151 const char *function) 151 const char *function, unsigned int line)
152{ 152{
153 int ret = 0; 153 int ret = 0;
154 154
155 if (inode1 == NULL) { 155 if (inode1 == NULL) {
156 __ext4_error(inode2->i_sb, function, 156 __ext4_error(inode2->i_sb, function, line,
157 "Both inodes should not be NULL: " 157 "Both inodes should not be NULL: "
158 "inode1 NULL inode2 %lu", inode2->i_ino); 158 "inode1 NULL inode2 %lu", inode2->i_ino);
159 ret = -EIO; 159 ret = -EIO;
160 } else if (inode2 == NULL) { 160 } else if (inode2 == NULL) {
161 __ext4_error(inode1->i_sb, function, 161 __ext4_error(inode1->i_sb, function, line,
162 "Both inodes should not be NULL: " 162 "Both inodes should not be NULL: "
163 "inode1 %lu inode2 NULL", inode1->i_ino); 163 "inode1 %lu inode2 NULL", inode1->i_ino);
164 ret = -EIO; 164 ret = -EIO;
@@ -1084,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1084 1084
1085 BUG_ON(inode1 == NULL && inode2 == NULL); 1085 BUG_ON(inode1 == NULL && inode2 == NULL);
1086 1086
1087 ret = mext_check_null_inode(inode1, inode2, __func__); 1087 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1088 if (ret < 0) 1088 if (ret < 0)
1089 goto out; 1089 goto out;
1090 1090
@@ -1121,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1121 1121
1122 BUG_ON(inode1 == NULL && inode2 == NULL); 1122 BUG_ON(inode1 == NULL && inode2 == NULL);
1123 1123
1124 ret = mext_check_null_inode(inode1, inode2, __func__); 1124 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1125 if (ret < 0) 1125 if (ret < 0)
1126 goto out; 1126 goto out;
1127 1127
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a43e6617b351..314c0d3b3fa9 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
179static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, 179static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
180 struct inode *inode); 180 struct inode *inode);
181 181
182unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
183{
184 unsigned len = le16_to_cpu(dlen);
185
186 if (len == EXT4_MAX_REC_LEN || len == 0)
187 return blocksize;
188 return (len & 65532) | ((len & 3) << 16);
189}
190
191__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
192{
193 if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
194 BUG();
195 if (len < 65536)
196 return cpu_to_le16(len);
197 if (len == blocksize) {
198 if (blocksize == 65536)
199 return cpu_to_le16(EXT4_MAX_REC_LEN);
200 else
201 return cpu_to_le16(0);
202 }
203 return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
204}
205
206/* 182/*
207 * p is at least 6 bytes before the end of page 183 * p is at least 6 bytes before the end of page
208 */ 184 */
@@ -605,7 +581,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
605 dir->i_sb->s_blocksize - 581 dir->i_sb->s_blocksize -
606 EXT4_DIR_REC_LEN(0)); 582 EXT4_DIR_REC_LEN(0));
607 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { 583 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
608 if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, 584 if (!ext4_check_dir_entry(dir, de, bh,
609 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) 585 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
610 +((char *)de - bh->b_data))) { 586 +((char *)de - bh->b_data))) {
611 /* On error, skip the f_pos to the next block. */ 587 /* On error, skip the f_pos to the next block. */
@@ -844,8 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
844 if ((char *) de + namelen <= dlimit && 820 if ((char *) de + namelen <= dlimit &&
845 ext4_match (namelen, name, de)) { 821 ext4_match (namelen, name, de)) {
846 /* found a match - just to be sure, do a full check */ 822 /* found a match - just to be sure, do a full check */
847 if (!ext4_check_dir_entry("ext4_find_entry", 823 if (!ext4_check_dir_entry(dir, de, bh, offset))
848 dir, de, bh, offset))
849 return -1; 824 return -1;
850 *res_dir = de; 825 *res_dir = de;
851 return 1; 826 return 1;
@@ -1019,7 +994,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1019 int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) 994 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
1020 + ((char *) de - bh->b_data); 995 + ((char *) de - bh->b_data);
1021 996
1022 if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) { 997 if (!ext4_check_dir_entry(dir, de, bh, off)) {
1023 brelse(bh); 998 brelse(bh);
1024 *err = ERR_BAD_DX_DIR; 999 *err = ERR_BAD_DX_DIR;
1025 goto errout; 1000 goto errout;
@@ -1088,7 +1063,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1088struct dentry *ext4_get_parent(struct dentry *child) 1063struct dentry *ext4_get_parent(struct dentry *child)
1089{ 1064{
1090 __u32 ino; 1065 __u32 ino;
1091 struct inode *inode;
1092 static const struct qstr dotdot = { 1066 static const struct qstr dotdot = {
1093 .name = "..", 1067 .name = "..",
1094 .len = 2, 1068 .len = 2,
@@ -1097,7 +1071,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
1097 struct buffer_head *bh; 1071 struct buffer_head *bh;
1098 1072
1099 bh = ext4_find_entry(child->d_inode, &dotdot, &de); 1073 bh = ext4_find_entry(child->d_inode, &dotdot, &de);
1100 inode = NULL;
1101 if (!bh) 1074 if (!bh)
1102 return ERR_PTR(-ENOENT); 1075 return ERR_PTR(-ENOENT);
1103 ino = le32_to_cpu(de->inode); 1076 ino = le32_to_cpu(de->inode);
@@ -1305,8 +1278,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1305 de = (struct ext4_dir_entry_2 *)bh->b_data; 1278 de = (struct ext4_dir_entry_2 *)bh->b_data;
1306 top = bh->b_data + blocksize - reclen; 1279 top = bh->b_data + blocksize - reclen;
1307 while ((char *) de <= top) { 1280 while ((char *) de <= top) {
1308 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1281 if (!ext4_check_dir_entry(dir, de, bh, offset))
1309 bh, offset))
1310 return -EIO; 1282 return -EIO;
1311 if (ext4_match(namelen, name, de)) 1283 if (ext4_match(namelen, name, de))
1312 return -EEXIST; 1284 return -EEXIST;
@@ -1673,7 +1645,7 @@ static int ext4_delete_entry(handle_t *handle,
1673 pde = NULL; 1645 pde = NULL;
1674 de = (struct ext4_dir_entry_2 *) bh->b_data; 1646 de = (struct ext4_dir_entry_2 *) bh->b_data;
1675 while (i < bh->b_size) { 1647 while (i < bh->b_size) {
1676 if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i)) 1648 if (!ext4_check_dir_entry(dir, de, bh, i))
1677 return -EIO; 1649 return -EIO;
1678 if (de == de_del) { 1650 if (de == de_del) {
1679 BUFFER_TRACE(bh, "get_write_access"); 1651 BUFFER_TRACE(bh, "get_write_access");
@@ -1956,7 +1928,7 @@ static int empty_dir(struct inode *inode)
1956 } 1928 }
1957 de = (struct ext4_dir_entry_2 *) bh->b_data; 1929 de = (struct ext4_dir_entry_2 *) bh->b_data;
1958 } 1930 }
1959 if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) { 1931 if (!ext4_check_dir_entry(inode, de, bh, offset)) {
1960 de = (struct ext4_dir_entry_2 *)(bh->b_data + 1932 de = (struct ext4_dir_entry_2 *)(bh->b_data +
1961 sb->s_blocksize); 1933 sb->s_blocksize);
1962 offset = (offset | (sb->s_blocksize - 1)) + 1; 1934 offset = (offset | (sb->s_blocksize - 1)) + 1;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 6df797eb9aeb..ca5c8aa00a2f 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -921,8 +921,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
921 &sbi->s_flex_groups[flex_group].free_inodes); 921 &sbi->s_flex_groups[flex_group].free_inodes);
922 } 922 }
923 923
924 ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); 924 ext4_handle_dirty_super(handle, sb);
925 sb->s_dirt = 1;
926 925
927exit_journal: 926exit_journal:
928 mutex_unlock(&sbi->s_resize_lock); 927 mutex_unlock(&sbi->s_resize_lock);
@@ -953,7 +952,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
953 ext4_fsblk_t n_blocks_count) 952 ext4_fsblk_t n_blocks_count)
954{ 953{
955 ext4_fsblk_t o_blocks_count; 954 ext4_fsblk_t o_blocks_count;
956 ext4_group_t o_groups_count;
957 ext4_grpblk_t last; 955 ext4_grpblk_t last;
958 ext4_grpblk_t add; 956 ext4_grpblk_t add;
959 struct buffer_head *bh; 957 struct buffer_head *bh;
@@ -965,7 +963,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
965 * yet: we're going to revalidate es->s_blocks_count after 963 * yet: we're going to revalidate es->s_blocks_count after
966 * taking the s_resize_lock below. */ 964 * taking the s_resize_lock below. */
967 o_blocks_count = ext4_blocks_count(es); 965 o_blocks_count = ext4_blocks_count(es);
968 o_groups_count = EXT4_SB(sb)->s_groups_count;
969 966
970 if (test_opt(sb, DEBUG)) 967 if (test_opt(sb, DEBUG))
971 printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", 968 printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
@@ -1045,13 +1042,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1045 goto exit_put; 1042 goto exit_put;
1046 } 1043 }
1047 ext4_blocks_count_set(es, o_blocks_count + add); 1044 ext4_blocks_count_set(es, o_blocks_count + add);
1048 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
1049 sb->s_dirt = 1;
1050 mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1045 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1051 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1046 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1052 o_blocks_count + add); 1047 o_blocks_count + add);
1053 /* We add the blocks to the bitmap and set the group need init bit */ 1048 /* We add the blocks to the bitmap and set the group need init bit */
1054 ext4_add_groupblocks(handle, sb, o_blocks_count, add); 1049 ext4_add_groupblocks(handle, sb, o_blocks_count, add);
1050 ext4_handle_dirty_super(handle, sb);
1055 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 1051 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1056 o_blocks_count + add); 1052 o_blocks_count + add);
1057 if ((err = ext4_journal_stop(handle))) 1053 if ((err = ext4_journal_stop(handle)))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4e8983a9811b..26147746c272 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -241,14 +241,14 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
241 if (sb->s_flags & MS_RDONLY) 241 if (sb->s_flags & MS_RDONLY)
242 return ERR_PTR(-EROFS); 242 return ERR_PTR(-EROFS);
243 243
244 vfs_check_frozen(sb, SB_FREEZE_WRITE); 244 vfs_check_frozen(sb, SB_FREEZE_TRANS);
245 /* Special case here: if the journal has aborted behind our 245 /* Special case here: if the journal has aborted behind our
246 * backs (eg. EIO in the commit thread), then we still need to 246 * backs (eg. EIO in the commit thread), then we still need to
247 * take the FS itself readonly cleanly. */ 247 * take the FS itself readonly cleanly. */
248 journal = EXT4_SB(sb)->s_journal; 248 journal = EXT4_SB(sb)->s_journal;
249 if (journal) { 249 if (journal) {
250 if (is_journal_aborted(journal)) { 250 if (is_journal_aborted(journal)) {
251 ext4_abort(sb, __func__, "Detected aborted journal"); 251 ext4_abort(sb, "Detected aborted journal");
252 return ERR_PTR(-EROFS); 252 return ERR_PTR(-EROFS);
253 } 253 }
254 return jbd2_journal_start(journal, nblocks); 254 return jbd2_journal_start(journal, nblocks);
@@ -262,7 +262,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
262 * that sync() will call the filesystem's write_super callback if 262 * that sync() will call the filesystem's write_super callback if
263 * appropriate. 263 * appropriate.
264 */ 264 */
265int __ext4_journal_stop(const char *where, handle_t *handle) 265int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
266{ 266{
267 struct super_block *sb; 267 struct super_block *sb;
268 int err; 268 int err;
@@ -279,12 +279,13 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
279 if (!err) 279 if (!err)
280 err = rc; 280 err = rc;
281 if (err) 281 if (err)
282 __ext4_std_error(sb, where, err); 282 __ext4_std_error(sb, where, line, err);
283 return err; 283 return err;
284} 284}
285 285
286void ext4_journal_abort_handle(const char *caller, const char *err_fn, 286void ext4_journal_abort_handle(const char *caller, unsigned int line,
287 struct buffer_head *bh, handle_t *handle, int err) 287 const char *err_fn, struct buffer_head *bh,
288 handle_t *handle, int err)
288{ 289{
289 char nbuf[16]; 290 char nbuf[16];
290 const char *errstr = ext4_decode_error(NULL, err, nbuf); 291 const char *errstr = ext4_decode_error(NULL, err, nbuf);
@@ -300,12 +301,47 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
300 if (is_handle_aborted(handle)) 301 if (is_handle_aborted(handle))
301 return; 302 return;
302 303
303 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 304 printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
304 caller, errstr, err_fn); 305 caller, line, errstr, err_fn);
305 306
306 jbd2_journal_abort_handle(handle); 307 jbd2_journal_abort_handle(handle);
307} 308}
308 309
310static void __save_error_info(struct super_block *sb, const char *func,
311 unsigned int line)
312{
313 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
314
315 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
316 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
317 es->s_last_error_time = cpu_to_le32(get_seconds());
318 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
319 es->s_last_error_line = cpu_to_le32(line);
320 if (!es->s_first_error_time) {
321 es->s_first_error_time = es->s_last_error_time;
322 strncpy(es->s_first_error_func, func,
323 sizeof(es->s_first_error_func));
324 es->s_first_error_line = cpu_to_le32(line);
325 es->s_first_error_ino = es->s_last_error_ino;
326 es->s_first_error_block = es->s_last_error_block;
327 }
328 /*
329 * Start the daily error reporting function if it hasn't been
330 * started already
331 */
332 if (!es->s_error_count)
333 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
334 es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
335}
336
337static void save_error_info(struct super_block *sb, const char *func,
338 unsigned int line)
339{
340 __save_error_info(sb, func, line);
341 ext4_commit_super(sb, 1);
342}
343
344
309/* Deal with the reporting of failure conditions on a filesystem such as 345/* Deal with the reporting of failure conditions on a filesystem such as
310 * inconsistencies detected or read IO failures. 346 * inconsistencies detected or read IO failures.
311 * 347 *
@@ -323,11 +359,6 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
323 359
324static void ext4_handle_error(struct super_block *sb) 360static void ext4_handle_error(struct super_block *sb)
325{ 361{
326 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
327
328 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
329 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
330
331 if (sb->s_flags & MS_RDONLY) 362 if (sb->s_flags & MS_RDONLY)
332 return; 363 return;
333 364
@@ -342,19 +373,19 @@ static void ext4_handle_error(struct super_block *sb)
342 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 373 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
343 sb->s_flags |= MS_RDONLY; 374 sb->s_flags |= MS_RDONLY;
344 } 375 }
345 ext4_commit_super(sb, 1);
346 if (test_opt(sb, ERRORS_PANIC)) 376 if (test_opt(sb, ERRORS_PANIC))
347 panic("EXT4-fs (device %s): panic forced after error\n", 377 panic("EXT4-fs (device %s): panic forced after error\n",
348 sb->s_id); 378 sb->s_id);
349} 379}
350 380
351void __ext4_error(struct super_block *sb, const char *function, 381void __ext4_error(struct super_block *sb, const char *function,
352 const char *fmt, ...) 382 unsigned int line, const char *fmt, ...)
353{ 383{
354 va_list args; 384 va_list args;
355 385
356 va_start(args, fmt); 386 va_start(args, fmt);
357 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 387 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
388 sb->s_id, function, line, current->comm);
358 vprintk(fmt, args); 389 vprintk(fmt, args);
359 printk("\n"); 390 printk("\n");
360 va_end(args); 391 va_end(args);
@@ -362,14 +393,22 @@ void __ext4_error(struct super_block *sb, const char *function,
362 ext4_handle_error(sb); 393 ext4_handle_error(sb);
363} 394}
364 395
365void ext4_error_inode(const char *function, struct inode *inode, 396void ext4_error_inode(struct inode *inode, const char *function,
397 unsigned int line, ext4_fsblk_t block,
366 const char *fmt, ...) 398 const char *fmt, ...)
367{ 399{
368 va_list args; 400 va_list args;
401 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
369 402
403 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
404 es->s_last_error_block = cpu_to_le64(block);
405 save_error_info(inode->i_sb, function, line);
370 va_start(args, fmt); 406 va_start(args, fmt);
371 printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", 407 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
372 inode->i_sb->s_id, function, inode->i_ino, current->comm); 408 inode->i_sb->s_id, function, line, inode->i_ino);
409 if (block)
410 printk("block %llu: ", block);
411 printk("comm %s: ", current->comm);
373 vprintk(fmt, args); 412 vprintk(fmt, args);
374 printk("\n"); 413 printk("\n");
375 va_end(args); 414 va_end(args);
@@ -377,20 +416,26 @@ void ext4_error_inode(const char *function, struct inode *inode,
377 ext4_handle_error(inode->i_sb); 416 ext4_handle_error(inode->i_sb);
378} 417}
379 418
380void ext4_error_file(const char *function, struct file *file, 419void ext4_error_file(struct file *file, const char *function,
381 const char *fmt, ...) 420 unsigned int line, const char *fmt, ...)
382{ 421{
383 va_list args; 422 va_list args;
423 struct ext4_super_block *es;
384 struct inode *inode = file->f_dentry->d_inode; 424 struct inode *inode = file->f_dentry->d_inode;
385 char pathname[80], *path; 425 char pathname[80], *path;
386 426
427 es = EXT4_SB(inode->i_sb)->s_es;
428 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
429 save_error_info(inode->i_sb, function, line);
387 va_start(args, fmt); 430 va_start(args, fmt);
388 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 431 path = d_path(&(file->f_path), pathname, sizeof(pathname));
389 if (!path) 432 if (!path)
390 path = "(unknown)"; 433 path = "(unknown)";
391 printk(KERN_CRIT 434 printk(KERN_CRIT
392 "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", 435 "EXT4-fs error (device %s): %s:%d: inode #%lu "
393 inode->i_sb->s_id, function, inode->i_ino, current->comm, path); 436 "(comm %s path %s): ",
437 inode->i_sb->s_id, function, line, inode->i_ino,
438 current->comm, path);
394 vprintk(fmt, args); 439 vprintk(fmt, args);
395 printk("\n"); 440 printk("\n");
396 va_end(args); 441 va_end(args);
@@ -435,7 +480,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
435/* __ext4_std_error decodes expected errors from journaling functions 480/* __ext4_std_error decodes expected errors from journaling functions
436 * automatically and invokes the appropriate error response. */ 481 * automatically and invokes the appropriate error response. */
437 482
438void __ext4_std_error(struct super_block *sb, const char *function, int errno) 483void __ext4_std_error(struct super_block *sb, const char *function,
484 unsigned int line, int errno)
439{ 485{
440 char nbuf[16]; 486 char nbuf[16];
441 const char *errstr; 487 const char *errstr;
@@ -448,8 +494,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
448 return; 494 return;
449 495
450 errstr = ext4_decode_error(sb, errno, nbuf); 496 errstr = ext4_decode_error(sb, errno, nbuf);
451 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 497 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
452 sb->s_id, function, errstr); 498 sb->s_id, function, line, errstr);
499 save_error_info(sb, function, line);
453 500
454 ext4_handle_error(sb); 501 ext4_handle_error(sb);
455} 502}
@@ -464,29 +511,29 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
464 * case we take the easy way out and panic immediately. 511 * case we take the easy way out and panic immediately.
465 */ 512 */
466 513
467void ext4_abort(struct super_block *sb, const char *function, 514void __ext4_abort(struct super_block *sb, const char *function,
468 const char *fmt, ...) 515 unsigned int line, const char *fmt, ...)
469{ 516{
470 va_list args; 517 va_list args;
471 518
519 save_error_info(sb, function, line);
472 va_start(args, fmt); 520 va_start(args, fmt);
473 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 521 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
522 function, line);
474 vprintk(fmt, args); 523 vprintk(fmt, args);
475 printk("\n"); 524 printk("\n");
476 va_end(args); 525 va_end(args);
477 526
527 if ((sb->s_flags & MS_RDONLY) == 0) {
528 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
529 sb->s_flags |= MS_RDONLY;
530 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
531 if (EXT4_SB(sb)->s_journal)
532 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
533 save_error_info(sb, function, line);
534 }
478 if (test_opt(sb, ERRORS_PANIC)) 535 if (test_opt(sb, ERRORS_PANIC))
479 panic("EXT4-fs panic from previous error\n"); 536 panic("EXT4-fs panic from previous error\n");
480
481 if (sb->s_flags & MS_RDONLY)
482 return;
483
484 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
485 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
486 sb->s_flags |= MS_RDONLY;
487 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
488 if (EXT4_SB(sb)->s_journal)
489 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
490} 537}
491 538
492void ext4_msg (struct super_block * sb, const char *prefix, 539void ext4_msg (struct super_block * sb, const char *prefix,
@@ -502,38 +549,47 @@ void ext4_msg (struct super_block * sb, const char *prefix,
502} 549}
503 550
504void __ext4_warning(struct super_block *sb, const char *function, 551void __ext4_warning(struct super_block *sb, const char *function,
505 const char *fmt, ...) 552 unsigned int line, const char *fmt, ...)
506{ 553{
507 va_list args; 554 va_list args;
508 555
509 va_start(args, fmt); 556 va_start(args, fmt);
510 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 557 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
511 sb->s_id, function); 558 sb->s_id, function, line);
512 vprintk(fmt, args); 559 vprintk(fmt, args);
513 printk("\n"); 560 printk("\n");
514 va_end(args); 561 va_end(args);
515} 562}
516 563
517void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 564void __ext4_grp_locked_error(const char *function, unsigned int line,
518 const char *function, const char *fmt, ...) 565 struct super_block *sb, ext4_group_t grp,
566 unsigned long ino, ext4_fsblk_t block,
567 const char *fmt, ...)
519__releases(bitlock) 568__releases(bitlock)
520__acquires(bitlock) 569__acquires(bitlock)
521{ 570{
522 va_list args; 571 va_list args;
523 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 572 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
524 573
574 es->s_last_error_ino = cpu_to_le32(ino);
575 es->s_last_error_block = cpu_to_le64(block);
576 __save_error_info(sb, function, line);
525 va_start(args, fmt); 577 va_start(args, fmt);
526 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 578 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
579 sb->s_id, function, line, grp);
580 if (ino)
581 printk("inode %lu: ", ino);
582 if (block)
583 printk("block %llu:", (unsigned long long) block);
527 vprintk(fmt, args); 584 vprintk(fmt, args);
528 printk("\n"); 585 printk("\n");
529 va_end(args); 586 va_end(args);
530 587
531 if (test_opt(sb, ERRORS_CONT)) { 588 if (test_opt(sb, ERRORS_CONT)) {
532 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
533 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
534 ext4_commit_super(sb, 0); 589 ext4_commit_super(sb, 0);
535 return; 590 return;
536 } 591 }
592
537 ext4_unlock_group(sb, grp); 593 ext4_unlock_group(sb, grp);
538 ext4_handle_error(sb); 594 ext4_handle_error(sb);
539 /* 595 /*
@@ -660,8 +716,7 @@ static void ext4_put_super(struct super_block *sb)
660 err = jbd2_journal_destroy(sbi->s_journal); 716 err = jbd2_journal_destroy(sbi->s_journal);
661 sbi->s_journal = NULL; 717 sbi->s_journal = NULL;
662 if (err < 0) 718 if (err < 0)
663 ext4_abort(sb, __func__, 719 ext4_abort(sb, "Couldn't clean up the journal");
664 "Couldn't clean up the journal");
665 } 720 }
666 721
667 ext4_release_system_zone(sb); 722 ext4_release_system_zone(sb);
@@ -813,8 +868,10 @@ static void destroy_inodecache(void)
813 kmem_cache_destroy(ext4_inode_cachep); 868 kmem_cache_destroy(ext4_inode_cachep);
814} 869}
815 870
816static void ext4_clear_inode(struct inode *inode) 871void ext4_clear_inode(struct inode *inode)
817{ 872{
873 invalidate_inode_buffers(inode);
874 end_writeback(inode);
818 dquot_drop(inode); 875 dquot_drop(inode);
819 ext4_discard_preallocations(inode); 876 ext4_discard_preallocations(inode);
820 if (EXT4_JOURNAL(inode)) 877 if (EXT4_JOURNAL(inode))
@@ -946,14 +1003,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
946 seq_puts(seq, ",journal_async_commit"); 1003 seq_puts(seq, ",journal_async_commit");
947 else if (test_opt(sb, JOURNAL_CHECKSUM)) 1004 else if (test_opt(sb, JOURNAL_CHECKSUM))
948 seq_puts(seq, ",journal_checksum"); 1005 seq_puts(seq, ",journal_checksum");
949 if (test_opt(sb, NOBH))
950 seq_puts(seq, ",nobh");
951 if (test_opt(sb, I_VERSION)) 1006 if (test_opt(sb, I_VERSION))
952 seq_puts(seq, ",i_version"); 1007 seq_puts(seq, ",i_version");
953 if (!test_opt(sb, DELALLOC)) 1008 if (!test_opt(sb, DELALLOC) &&
1009 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
954 seq_puts(seq, ",nodelalloc"); 1010 seq_puts(seq, ",nodelalloc");
955 1011
956
957 if (sbi->s_stripe) 1012 if (sbi->s_stripe)
958 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1013 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
959 /* 1014 /*
@@ -977,7 +1032,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
977 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 1032 if (test_opt(sb, NO_AUTO_DA_ALLOC))
978 seq_puts(seq, ",noauto_da_alloc"); 1033 seq_puts(seq, ",noauto_da_alloc");
979 1034
980 if (test_opt(sb, DISCARD)) 1035 if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
981 seq_puts(seq, ",discard"); 1036 seq_puts(seq, ",discard");
982 1037
983 if (test_opt(sb, NOLOAD)) 1038 if (test_opt(sb, NOLOAD))
@@ -986,6 +1041,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
986 if (test_opt(sb, DIOREAD_NOLOCK)) 1041 if (test_opt(sb, DIOREAD_NOLOCK))
987 seq_puts(seq, ",dioread_nolock"); 1042 seq_puts(seq, ",dioread_nolock");
988 1043
1044 if (test_opt(sb, BLOCK_VALIDITY) &&
1045 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
1046 seq_puts(seq, ",block_validity");
1047
989 ext4_show_quota_options(seq, sb); 1048 ext4_show_quota_options(seq, sb);
990 1049
991 return 0; 1050 return 0;
@@ -1065,6 +1124,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
1065static int ext4_write_info(struct super_block *sb, int type); 1124static int ext4_write_info(struct super_block *sb, int type);
1066static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1125static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1067 char *path); 1126 char *path);
1127static int ext4_quota_off(struct super_block *sb, int type);
1068static int ext4_quota_on_mount(struct super_block *sb, int type); 1128static int ext4_quota_on_mount(struct super_block *sb, int type);
1069static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1129static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1070 size_t len, loff_t off); 1130 size_t len, loff_t off);
@@ -1086,7 +1146,7 @@ static const struct dquot_operations ext4_quota_operations = {
1086 1146
1087static const struct quotactl_ops ext4_qctl_operations = { 1147static const struct quotactl_ops ext4_qctl_operations = {
1088 .quota_on = ext4_quota_on, 1148 .quota_on = ext4_quota_on,
1089 .quota_off = dquot_quota_off, 1149 .quota_off = ext4_quota_off,
1090 .quota_sync = dquot_quota_sync, 1150 .quota_sync = dquot_quota_sync,
1091 .get_info = dquot_get_dqinfo, 1151 .get_info = dquot_get_dqinfo,
1092 .set_info = dquot_set_dqinfo, 1152 .set_info = dquot_set_dqinfo,
@@ -1100,14 +1160,13 @@ static const struct super_operations ext4_sops = {
1100 .destroy_inode = ext4_destroy_inode, 1160 .destroy_inode = ext4_destroy_inode,
1101 .write_inode = ext4_write_inode, 1161 .write_inode = ext4_write_inode,
1102 .dirty_inode = ext4_dirty_inode, 1162 .dirty_inode = ext4_dirty_inode,
1103 .delete_inode = ext4_delete_inode, 1163 .evict_inode = ext4_evict_inode,
1104 .put_super = ext4_put_super, 1164 .put_super = ext4_put_super,
1105 .sync_fs = ext4_sync_fs, 1165 .sync_fs = ext4_sync_fs,
1106 .freeze_fs = ext4_freeze, 1166 .freeze_fs = ext4_freeze,
1107 .unfreeze_fs = ext4_unfreeze, 1167 .unfreeze_fs = ext4_unfreeze,
1108 .statfs = ext4_statfs, 1168 .statfs = ext4_statfs,
1109 .remount_fs = ext4_remount, 1169 .remount_fs = ext4_remount,
1110 .clear_inode = ext4_clear_inode,
1111 .show_options = ext4_show_options, 1170 .show_options = ext4_show_options,
1112#ifdef CONFIG_QUOTA 1171#ifdef CONFIG_QUOTA
1113 .quota_read = ext4_quota_read, 1172 .quota_read = ext4_quota_read,
@@ -1121,12 +1180,11 @@ static const struct super_operations ext4_nojournal_sops = {
1121 .destroy_inode = ext4_destroy_inode, 1180 .destroy_inode = ext4_destroy_inode,
1122 .write_inode = ext4_write_inode, 1181 .write_inode = ext4_write_inode,
1123 .dirty_inode = ext4_dirty_inode, 1182 .dirty_inode = ext4_dirty_inode,
1124 .delete_inode = ext4_delete_inode, 1183 .evict_inode = ext4_evict_inode,
1125 .write_super = ext4_write_super, 1184 .write_super = ext4_write_super,
1126 .put_super = ext4_put_super, 1185 .put_super = ext4_put_super,
1127 .statfs = ext4_statfs, 1186 .statfs = ext4_statfs,
1128 .remount_fs = ext4_remount, 1187 .remount_fs = ext4_remount,
1129 .clear_inode = ext4_clear_inode,
1130 .show_options = ext4_show_options, 1188 .show_options = ext4_show_options,
1131#ifdef CONFIG_QUOTA 1189#ifdef CONFIG_QUOTA
1132 .quota_read = ext4_quota_read, 1190 .quota_read = ext4_quota_read,
@@ -1624,10 +1682,12 @@ set_qf_format:
1624 *n_blocks_count = option; 1682 *n_blocks_count = option;
1625 break; 1683 break;
1626 case Opt_nobh: 1684 case Opt_nobh:
1627 set_opt(sbi->s_mount_opt, NOBH); 1685 ext4_msg(sb, KERN_WARNING,
1686 "Ignoring deprecated nobh option");
1628 break; 1687 break;
1629 case Opt_bh: 1688 case Opt_bh:
1630 clear_opt(sbi->s_mount_opt, NOBH); 1689 ext4_msg(sb, KERN_WARNING,
1690 "Ignoring deprecated bh option");
1631 break; 1691 break;
1632 case Opt_i_version: 1692 case Opt_i_version:
1633 set_opt(sbi->s_mount_opt, I_VERSION); 1693 set_opt(sbi->s_mount_opt, I_VERSION);
@@ -2249,6 +2309,8 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2249{ 2309{
2250 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2310 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2251 2311
2312 if (!sb->s_bdev->bd_part)
2313 return snprintf(buf, PAGE_SIZE, "0\n");
2252 return snprintf(buf, PAGE_SIZE, "%lu\n", 2314 return snprintf(buf, PAGE_SIZE, "%lu\n",
2253 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2315 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2254 sbi->s_sectors_written_start) >> 1); 2316 sbi->s_sectors_written_start) >> 1);
@@ -2259,6 +2321,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2259{ 2321{
2260 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2322 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2261 2323
2324 if (!sb->s_bdev->bd_part)
2325 return snprintf(buf, PAGE_SIZE, "0\n");
2262 return snprintf(buf, PAGE_SIZE, "%llu\n", 2326 return snprintf(buf, PAGE_SIZE, "%llu\n",
2263 (unsigned long long)(sbi->s_kbytes_written + 2327 (unsigned long long)(sbi->s_kbytes_written +
2264 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2328 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
@@ -2431,6 +2495,53 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2431 return 1; 2495 return 1;
2432} 2496}
2433 2497
2498/*
2499 * This function is called once a day if we have errors logged
2500 * on the file system
2501 */
2502static void print_daily_error_info(unsigned long arg)
2503{
2504 struct super_block *sb = (struct super_block *) arg;
2505 struct ext4_sb_info *sbi;
2506 struct ext4_super_block *es;
2507
2508 sbi = EXT4_SB(sb);
2509 es = sbi->s_es;
2510
2511 if (es->s_error_count)
2512 ext4_msg(sb, KERN_NOTICE, "error count: %u",
2513 le32_to_cpu(es->s_error_count));
2514 if (es->s_first_error_time) {
2515 printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
2516 sb->s_id, le32_to_cpu(es->s_first_error_time),
2517 (int) sizeof(es->s_first_error_func),
2518 es->s_first_error_func,
2519 le32_to_cpu(es->s_first_error_line));
2520 if (es->s_first_error_ino)
2521 printk(": inode %u",
2522 le32_to_cpu(es->s_first_error_ino));
2523 if (es->s_first_error_block)
2524 printk(": block %llu", (unsigned long long)
2525 le64_to_cpu(es->s_first_error_block));
2526 printk("\n");
2527 }
2528 if (es->s_last_error_time) {
2529 printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
2530 sb->s_id, le32_to_cpu(es->s_last_error_time),
2531 (int) sizeof(es->s_last_error_func),
2532 es->s_last_error_func,
2533 le32_to_cpu(es->s_last_error_line));
2534 if (es->s_last_error_ino)
2535 printk(": inode %u",
2536 le32_to_cpu(es->s_last_error_ino));
2537 if (es->s_last_error_block)
2538 printk(": block %llu", (unsigned long long)
2539 le64_to_cpu(es->s_last_error_block));
2540 printk("\n");
2541 }
2542 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
2543}
2544
2434static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2545static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2435 __releases(kernel_lock) 2546 __releases(kernel_lock)
2436 __acquires(kernel_lock) 2547 __acquires(kernel_lock)
@@ -2448,7 +2559,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2448 struct inode *root; 2559 struct inode *root;
2449 char *cp; 2560 char *cp;
2450 const char *descr; 2561 const char *descr;
2451 int ret = -EINVAL; 2562 int ret = -ENOMEM;
2452 int blocksize; 2563 int blocksize;
2453 unsigned int db_count; 2564 unsigned int db_count;
2454 unsigned int i; 2565 unsigned int i;
@@ -2459,13 +2570,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2459 2570
2460 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2571 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2461 if (!sbi) 2572 if (!sbi)
2462 return -ENOMEM; 2573 goto out_free_orig;
2463 2574
2464 sbi->s_blockgroup_lock = 2575 sbi->s_blockgroup_lock =
2465 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 2576 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2466 if (!sbi->s_blockgroup_lock) { 2577 if (!sbi->s_blockgroup_lock) {
2467 kfree(sbi); 2578 kfree(sbi);
2468 return -ENOMEM; 2579 goto out_free_orig;
2469 } 2580 }
2470 sb->s_fs_info = sbi; 2581 sb->s_fs_info = sbi;
2471 sbi->s_mount_opt = 0; 2582 sbi->s_mount_opt = 0;
@@ -2473,8 +2584,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2473 sbi->s_resgid = EXT4_DEF_RESGID; 2584 sbi->s_resgid = EXT4_DEF_RESGID;
2474 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2585 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2475 sbi->s_sb_block = sb_block; 2586 sbi->s_sb_block = sb_block;
2476 sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, 2587 if (sb->s_bdev->bd_part)
2477 sectors[1]); 2588 sbi->s_sectors_written_start =
2589 part_stat_read(sb->s_bdev->bd_part, sectors[1]);
2478 2590
2479 unlock_kernel(); 2591 unlock_kernel();
2480 2592
@@ -2482,6 +2594,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2482 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 2594 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2483 *cp = '!'; 2595 *cp = '!';
2484 2596
2597 ret = -EINVAL;
2485 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2598 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2486 if (!blocksize) { 2599 if (!blocksize) {
2487 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 2600 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
@@ -2546,6 +2659,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2546 set_opt(sbi->s_mount_opt, ERRORS_CONT); 2659 set_opt(sbi->s_mount_opt, ERRORS_CONT);
2547 else 2660 else
2548 set_opt(sbi->s_mount_opt, ERRORS_RO); 2661 set_opt(sbi->s_mount_opt, ERRORS_RO);
2662 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
2663 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
2664 if (def_mount_opts & EXT4_DEFM_DISCARD)
2665 set_opt(sbi->s_mount_opt, DISCARD);
2549 2666
2550 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2667 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2551 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2668 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -2553,15 +2670,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2553 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2670 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2554 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2671 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2555 2672
2556 set_opt(sbi->s_mount_opt, BARRIER); 2673 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
2674 set_opt(sbi->s_mount_opt, BARRIER);
2557 2675
2558 /* 2676 /*
2559 * enable delayed allocation by default 2677 * enable delayed allocation by default
2560 * Use -o nodelalloc to turn it off 2678 * Use -o nodelalloc to turn it off
2561 */ 2679 */
2562 if (!IS_EXT3_SB(sb)) 2680 if (!IS_EXT3_SB(sb) &&
2681 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
2563 set_opt(sbi->s_mount_opt, DELALLOC); 2682 set_opt(sbi->s_mount_opt, DELALLOC);
2564 2683
2684 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
2685 &journal_devnum, &journal_ioprio, NULL, 0)) {
2686 ext4_msg(sb, KERN_WARNING,
2687 "failed to parse options in superblock: %s",
2688 sbi->s_es->s_mount_opts);
2689 }
2565 if (!parse_options((char *) data, sb, &journal_devnum, 2690 if (!parse_options((char *) data, sb, &journal_devnum,
2566 &journal_ioprio, NULL, 0)) 2691 &journal_ioprio, NULL, 0))
2567 goto failed_mount; 2692 goto failed_mount;
@@ -2912,18 +3037,7 @@ no_journal:
2912 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3037 ext4_msg(sb, KERN_ERR, "insufficient memory");
2913 goto failed_mount_wq; 3038 goto failed_mount_wq;
2914 } 3039 }
2915 if (test_opt(sb, NOBH)) { 3040
2916 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2917 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2918 "its supported only with writeback mode");
2919 clear_opt(sbi->s_mount_opt, NOBH);
2920 }
2921 if (test_opt(sb, DIOREAD_NOLOCK)) {
2922 ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
2923 "not supported with nobh mode");
2924 goto failed_mount_wq;
2925 }
2926 }
2927 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3041 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
2928 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3042 if (!EXT4_SB(sb)->dio_unwritten_wq) {
2929 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3043 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3010,7 +3124,7 @@ no_journal:
3010 ext4_ext_init(sb); 3124 ext4_ext_init(sb);
3011 err = ext4_mb_init(sb, needs_recovery); 3125 err = ext4_mb_init(sb, needs_recovery);
3012 if (err) { 3126 if (err) {
3013 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", 3127 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3014 err); 3128 err);
3015 goto failed_mount4; 3129 goto failed_mount4;
3016 } 3130 }
@@ -3043,7 +3157,14 @@ no_journal:
3043 descr = "out journal"; 3157 descr = "out journal";
3044 3158
3045 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 3159 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
3046 "Opts: %s", descr, orig_data); 3160 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3161 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3162
3163 init_timer(&sbi->s_err_report);
3164 sbi->s_err_report.function = print_daily_error_info;
3165 sbi->s_err_report.data = (unsigned long) sb;
3166 if (es->s_error_count)
3167 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3047 3168
3048 lock_kernel(); 3169 lock_kernel();
3049 kfree(orig_data); 3170 kfree(orig_data);
@@ -3093,6 +3214,7 @@ out_fail:
3093 kfree(sbi->s_blockgroup_lock); 3214 kfree(sbi->s_blockgroup_lock);
3094 kfree(sbi); 3215 kfree(sbi);
3095 lock_kernel(); 3216 lock_kernel();
3217out_free_orig:
3096 kfree(orig_data); 3218 kfree(orig_data);
3097 return ret; 3219 return ret;
3098} 3220}
@@ -3110,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
3110 journal->j_min_batch_time = sbi->s_min_batch_time; 3232 journal->j_min_batch_time = sbi->s_min_batch_time;
3111 journal->j_max_batch_time = sbi->s_max_batch_time; 3233 journal->j_max_batch_time = sbi->s_max_batch_time;
3112 3234
3113 spin_lock(&journal->j_state_lock); 3235 write_lock(&journal->j_state_lock);
3114 if (test_opt(sb, BARRIER)) 3236 if (test_opt(sb, BARRIER))
3115 journal->j_flags |= JBD2_BARRIER; 3237 journal->j_flags |= JBD2_BARRIER;
3116 else 3238 else
@@ -3119,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
3119 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 3241 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
3120 else 3242 else
3121 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 3243 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
3122 spin_unlock(&journal->j_state_lock); 3244 write_unlock(&journal->j_state_lock);
3123} 3245}
3124 3246
3125static journal_t *ext4_get_journal(struct super_block *sb, 3247static journal_t *ext4_get_journal(struct super_block *sb,
@@ -3327,8 +3449,17 @@ static int ext4_load_journal(struct super_block *sb,
3327 3449
3328 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3450 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
3329 err = jbd2_journal_wipe(journal, !really_read_only); 3451 err = jbd2_journal_wipe(journal, !really_read_only);
3330 if (!err) 3452 if (!err) {
3453 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
3454 if (save)
3455 memcpy(save, ((char *) es) +
3456 EXT4_S_ERR_START, EXT4_S_ERR_LEN);
3331 err = jbd2_journal_load(journal); 3457 err = jbd2_journal_load(journal);
3458 if (save)
3459 memcpy(((char *) es) + EXT4_S_ERR_START,
3460 save, EXT4_S_ERR_LEN);
3461 kfree(save);
3462 }
3332 3463
3333 if (err) { 3464 if (err) {
3334 ext4_msg(sb, KERN_ERR, "error loading journal"); 3465 ext4_msg(sb, KERN_ERR, "error loading journal");
@@ -3384,10 +3515,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
3384 */ 3515 */
3385 if (!(sb->s_flags & MS_RDONLY)) 3516 if (!(sb->s_flags & MS_RDONLY))
3386 es->s_wtime = cpu_to_le32(get_seconds()); 3517 es->s_wtime = cpu_to_le32(get_seconds());
3387 es->s_kbytes_written = 3518 if (sb->s_bdev->bd_part)
3388 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3519 es->s_kbytes_written =
3520 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
3389 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3521 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3390 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3522 EXT4_SB(sb)->s_sectors_written_start) >> 1));
3523 else
3524 es->s_kbytes_written =
3525 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
3391 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3526 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3392 &EXT4_SB(sb)->s_freeblocks_counter)); 3527 &EXT4_SB(sb)->s_freeblocks_counter));
3393 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3528 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
@@ -3491,7 +3626,7 @@ int ext4_force_commit(struct super_block *sb)
3491 3626
3492 journal = EXT4_SB(sb)->s_journal; 3627 journal = EXT4_SB(sb)->s_journal;
3493 if (journal) { 3628 if (journal) {
3494 vfs_check_frozen(sb, SB_FREEZE_WRITE); 3629 vfs_check_frozen(sb, SB_FREEZE_TRANS);
3495 ret = ext4_journal_force_commit(journal); 3630 ret = ext4_journal_force_commit(journal);
3496 } 3631 }
3497 3632
@@ -3616,7 +3751,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3616 } 3751 }
3617 3752
3618 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 3753 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
3619 ext4_abort(sb, __func__, "Abort forced by user"); 3754 ext4_abort(sb, "Abort forced by user");
3620 3755
3621 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3756 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3622 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3757 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3981,6 +4116,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3981 return err; 4116 return err;
3982} 4117}
3983 4118
4119static int ext4_quota_off(struct super_block *sb, int type)
4120{
4121 /* Force all delayed allocation blocks to be allocated */
4122 if (test_opt(sb, DELALLOC)) {
4123 down_read(&sb->s_umount);
4124 sync_filesystem(sb);
4125 up_read(&sb->s_umount);
4126 }
4127
4128 return dquot_quota_off(sb, type);
4129}
4130
3984/* Read data from quotafile - avoid pagecache and such because we cannot afford 4131/* Read data from quotafile - avoid pagecache and such because we cannot afford
3985 * acquiring the locks... As quota files are never truncated and quota code 4132 * acquiring the locks... As quota files are never truncated and quota code
3986 * itself serializes the operations (and noone else should touch the files) 4133 * itself serializes the operations (and noone else should touch the files)
@@ -4030,7 +4177,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
4030 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4177 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
4031 int err = 0; 4178 int err = 0;
4032 int offset = off & (sb->s_blocksize - 1); 4179 int offset = off & (sb->s_blocksize - 1);
4033 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
4034 struct buffer_head *bh; 4180 struct buffer_head *bh;
4035 handle_t *handle = journal_current_handle(); 4181 handle_t *handle = journal_current_handle();
4036 4182
@@ -4055,24 +4201,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
4055 bh = ext4_bread(handle, inode, blk, 1, &err); 4201 bh = ext4_bread(handle, inode, blk, 1, &err);
4056 if (!bh) 4202 if (!bh)
4057 goto out; 4203 goto out;
4058 if (journal_quota) { 4204 err = ext4_journal_get_write_access(handle, bh);
4059 err = ext4_journal_get_write_access(handle, bh); 4205 if (err) {
4060 if (err) { 4206 brelse(bh);
4061 brelse(bh); 4207 goto out;
4062 goto out;
4063 }
4064 } 4208 }
4065 lock_buffer(bh); 4209 lock_buffer(bh);
4066 memcpy(bh->b_data+offset, data, len); 4210 memcpy(bh->b_data+offset, data, len);
4067 flush_dcache_page(bh->b_page); 4211 flush_dcache_page(bh->b_page);
4068 unlock_buffer(bh); 4212 unlock_buffer(bh);
4069 if (journal_quota) 4213 err = ext4_handle_dirty_metadata(handle, NULL, bh);
4070 err = ext4_handle_dirty_metadata(handle, NULL, bh);
4071 else {
4072 /* Always do at least ordered writes for quotas */
4073 err = ext4_jbd2_file_inode(handle, inode);
4074 mark_buffer_dirty(bh);
4075 }
4076 brelse(bh); 4214 brelse(bh);
4077out: 4215out:
4078 if (err) { 4216 if (err) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 04338009793a..3a8cd8dff1ad 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -458,8 +458,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
458 458
459 if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { 459 if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
460 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); 460 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
461 sb->s_dirt = 1; 461 ext4_handle_dirty_super(handle, sb);
462 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
463 } 462 }
464} 463}
465 464
@@ -1418,7 +1417,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh)
1418 ea_bdebug(bh, "out of memory"); 1417 ea_bdebug(bh, "out of memory");
1419 return; 1418 return;
1420 } 1419 }
1421 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); 1420 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1422 if (error) { 1421 if (error) {
1423 mb_cache_entry_free(ce); 1422 mb_cache_entry_free(ce);
1424 if (error == -EBUSY) { 1423 if (error == -EBUSY) {
@@ -1490,8 +1489,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1490 return NULL; /* never share */ 1489 return NULL; /* never share */
1491 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1490 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1492again: 1491again:
1493 ce = mb_cache_entry_find_first(ext4_xattr_cache, 0, 1492 ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev,
1494 inode->i_sb->s_bdev, hash); 1493 hash);
1495 while (ce) { 1494 while (ce) {
1496 struct buffer_head *bh; 1495 struct buffer_head *bh;
1497 1496
@@ -1515,7 +1514,7 @@ again:
1515 return bh; 1514 return bh;
1516 } 1515 }
1517 brelse(bh); 1516 brelse(bh);
1518 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); 1517 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1519 } 1518 }
1520 return NULL; 1519 return NULL;
1521} 1520}
@@ -1591,9 +1590,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1591int __init 1590int __init
1592init_ext4_xattr(void) 1591init_ext4_xattr(void)
1593{ 1592{
1594 ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL, 1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
1595 sizeof(struct mb_cache_entry) +
1596 sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1597 if (!ext4_xattr_cache) 1594 if (!ext4_xattr_cache)
1598 return -ENOMEM; 1595 return -ENOMEM;
1599 return 0; 1596 return 0;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 27ac25725954..d75a77f85c28 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -306,7 +306,6 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
306extern const struct file_operations fat_file_operations; 306extern const struct file_operations fat_file_operations;
307extern const struct inode_operations fat_file_inode_operations; 307extern const struct inode_operations fat_file_inode_operations;
308extern int fat_setattr(struct dentry * dentry, struct iattr * attr); 308extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
309extern int fat_setsize(struct inode *inode, loff_t offset);
310extern void fat_truncate_blocks(struct inode *inode, loff_t offset); 309extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
311extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, 310extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
312 struct kstat *stat); 311 struct kstat *stat);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 990dfae022e5..7257752b6d5d 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -364,18 +364,6 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
364 return 0; 364 return 0;
365} 365}
366 366
367int fat_setsize(struct inode *inode, loff_t offset)
368{
369 int error;
370
371 error = simple_setsize(inode, offset);
372 if (error)
373 return error;
374 fat_truncate_blocks(inode, offset);
375
376 return error;
377}
378
379#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) 367#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
380/* valid file mode bits */ 368/* valid file mode bits */
381#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) 369#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
@@ -387,21 +375,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
387 unsigned int ia_valid; 375 unsigned int ia_valid;
388 int error; 376 int error;
389 377
390 /*
391 * Expand the file. Since inode_setattr() updates ->i_size
392 * before calling the ->truncate(), but FAT needs to fill the
393 * hole before it. XXX: this is no longer true with new truncate
394 * sequence.
395 */
396 if (attr->ia_valid & ATTR_SIZE) {
397 if (attr->ia_size > inode->i_size) {
398 error = fat_cont_expand(inode, attr->ia_size);
399 if (error || attr->ia_valid == ATTR_SIZE)
400 goto out;
401 attr->ia_valid &= ~ATTR_SIZE;
402 }
403 }
404
405 /* Check for setting the inode time. */ 378 /* Check for setting the inode time. */
406 ia_valid = attr->ia_valid; 379 ia_valid = attr->ia_valid;
407 if (ia_valid & TIMES_SET_FLAGS) { 380 if (ia_valid & TIMES_SET_FLAGS) {
@@ -417,6 +390,21 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
417 goto out; 390 goto out;
418 } 391 }
419 392
393 /*
394 * Expand the file. Since inode_setattr() updates ->i_size
395 * before calling the ->truncate(), but FAT needs to fill the
396 * hole before it. XXX: this is no longer true with new truncate
397 * sequence.
398 */
399 if (attr->ia_valid & ATTR_SIZE) {
400 if (attr->ia_size > inode->i_size) {
401 error = fat_cont_expand(inode, attr->ia_size);
402 if (error || attr->ia_valid == ATTR_SIZE)
403 goto out;
404 attr->ia_valid &= ~ATTR_SIZE;
405 }
406 }
407
420 if (((attr->ia_valid & ATTR_UID) && 408 if (((attr->ia_valid & ATTR_UID) &&
421 (attr->ia_uid != sbi->options.fs_uid)) || 409 (attr->ia_uid != sbi->options.fs_uid)) ||
422 ((attr->ia_valid & ATTR_GID) && 410 ((attr->ia_valid & ATTR_GID) &&
@@ -441,12 +429,11 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
441 } 429 }
442 430
443 if (attr->ia_valid & ATTR_SIZE) { 431 if (attr->ia_valid & ATTR_SIZE) {
444 error = fat_setsize(inode, attr->ia_size); 432 truncate_setsize(inode, attr->ia_size);
445 if (error) 433 fat_truncate_blocks(inode, attr->ia_size);
446 goto out;
447 } 434 }
448 435
449 generic_setattr(inode, attr); 436 setattr_copy(inode, attr);
450 mark_inode_dirty(inode); 437 mark_inode_dirty(inode);
451out: 438out:
452 return error; 439 return error;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 7bf45aee56d7..830058057d33 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -159,7 +159,7 @@ static int fat_write_begin(struct file *file, struct address_space *mapping,
159 int err; 159 int err;
160 160
161 *pagep = NULL; 161 *pagep = NULL;
162 err = cont_write_begin_newtrunc(file, mapping, pos, len, flags, 162 err = cont_write_begin(file, mapping, pos, len, flags,
163 pagep, fsdata, fat_get_block, 163 pagep, fsdata, fat_get_block,
164 &MSDOS_I(mapping->host)->mmu_private); 164 &MSDOS_I(mapping->host)->mmu_private);
165 if (err < 0) 165 if (err < 0)
@@ -212,8 +212,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
212 * FAT need to use the DIO_LOCKING for avoiding the race 212 * FAT need to use the DIO_LOCKING for avoiding the race
213 * condition of fat_get_block() and ->truncate(). 213 * condition of fat_get_block() and ->truncate().
214 */ 214 */
215 ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, 215 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
216 iov, offset, nr_segs, fat_get_block, NULL); 216 iov, offset, nr_segs, fat_get_block, NULL);
217 if (ret < 0 && (rw & WRITE)) 217 if (ret < 0 && (rw & WRITE))
218 fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); 218 fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
219 219
@@ -263,7 +263,7 @@ static const struct address_space_operations fat_aops = {
263 * check if the location is still valid and retry if it 263 * check if the location is still valid and retry if it
264 * isn't. Otherwise we do changes. 264 * isn't. Otherwise we do changes.
265 * 5. Spinlock is used to protect hash/unhash/location check/lookup 265 * 5. Spinlock is used to protect hash/unhash/location check/lookup
266 * 6. fat_clear_inode() unhashes the F-d-c entry. 266 * 6. fat_evict_inode() unhashes the F-d-c entry.
267 * 7. lookup() and readdir() do igrab() if they find a F-d-c entry 267 * 7. lookup() and readdir() do igrab() if they find a F-d-c entry
268 * and consider negative result as cache miss. 268 * and consider negative result as cache miss.
269 */ 269 */
@@ -448,16 +448,15 @@ out:
448 448
449EXPORT_SYMBOL_GPL(fat_build_inode); 449EXPORT_SYMBOL_GPL(fat_build_inode);
450 450
451static void fat_delete_inode(struct inode *inode) 451static void fat_evict_inode(struct inode *inode)
452{ 452{
453 truncate_inode_pages(&inode->i_data, 0); 453 truncate_inode_pages(&inode->i_data, 0);
454 inode->i_size = 0; 454 if (!inode->i_nlink) {
455 fat_truncate_blocks(inode, 0); 455 inode->i_size = 0;
456 clear_inode(inode); 456 fat_truncate_blocks(inode, 0);
457} 457 }
458 458 invalidate_inode_buffers(inode);
459static void fat_clear_inode(struct inode *inode) 459 end_writeback(inode);
460{
461 fat_cache_inval_inode(inode); 460 fat_cache_inval_inode(inode);
462 fat_detach(inode); 461 fat_detach(inode);
463} 462}
@@ -674,12 +673,11 @@ static const struct super_operations fat_sops = {
674 .alloc_inode = fat_alloc_inode, 673 .alloc_inode = fat_alloc_inode,
675 .destroy_inode = fat_destroy_inode, 674 .destroy_inode = fat_destroy_inode,
676 .write_inode = fat_write_inode, 675 .write_inode = fat_write_inode,
677 .delete_inode = fat_delete_inode, 676 .evict_inode = fat_evict_inode,
678 .put_super = fat_put_super, 677 .put_super = fat_put_super,
679 .write_super = fat_write_super, 678 .write_super = fat_write_super,
680 .sync_fs = fat_sync_fs, 679 .sync_fs = fat_sync_fs,
681 .statfs = fat_statfs, 680 .statfs = fat_statfs,
682 .clear_inode = fat_clear_inode,
683 .remount_fs = fat_remount, 681 .remount_fs = fat_remount,
684 682
685 .show_options = fat_show_options, 683 .show_options = fat_show_options,
diff --git a/fs/file.c b/fs/file.c
index 34bb7f71d994..cccaead962c2 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -178,7 +178,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
178 fdt->open_fds = (fd_set *)data; 178 fdt->open_fds = (fd_set *)data;
179 data += nr / BITS_PER_BYTE; 179 data += nr / BITS_PER_BYTE;
180 fdt->close_on_exec = (fd_set *)data; 180 fdt->close_on_exec = (fd_set *)data;
181 INIT_RCU_HEAD(&fdt->rcu);
182 fdt->next = NULL; 181 fdt->next = NULL;
183 182
184 return fdt; 183 return fdt;
@@ -312,7 +311,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
312 new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; 311 new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
313 new_fdt->open_fds = (fd_set *)&newf->open_fds_init; 312 new_fdt->open_fds = (fd_set *)&newf->open_fds_init;
314 new_fdt->fd = &newf->fd_array[0]; 313 new_fdt->fd = &newf->fd_array[0];
315 INIT_RCU_HEAD(&new_fdt->rcu);
316 new_fdt->next = NULL; 314 new_fdt->next = NULL;
317 315
318 spin_lock(&oldf->file_lock); 316 spin_lock(&oldf->file_lock);
@@ -430,7 +428,6 @@ struct files_struct init_files = {
430 .fd = &init_files.fd_array[0], 428 .fd = &init_files.fd_array[0],
431 .close_on_exec = (fd_set *)&init_files.close_on_exec_init, 429 .close_on_exec = (fd_set *)&init_files.close_on_exec_init,
432 .open_fds = (fd_set *)&init_files.open_fds_init, 430 .open_fds = (fd_set *)&init_files.open_fds_init,
433 .rcu = RCU_HEAD_INIT,
434 }, 431 },
435 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 432 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
436}; 433};
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 50ab5eecb99b..881aa3d217f0 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -63,7 +63,7 @@ extern void vxfs_put_fake_inode(struct inode *);
63extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); 63extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t);
64extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); 64extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t);
65extern struct inode * vxfs_iget(struct super_block *, ino_t); 65extern struct inode * vxfs_iget(struct super_block *, ino_t);
66extern void vxfs_clear_inode(struct inode *); 66extern void vxfs_evict_inode(struct inode *);
67 67
68/* vxfs_lookup.c */ 68/* vxfs_lookup.c */
69extern const struct inode_operations vxfs_dir_inode_ops; 69extern const struct inode_operations vxfs_dir_inode_ops;
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 03a6ea5e99f7..79d1b4ea13e7 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,15 +337,17 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
337} 337}
338 338
339/** 339/**
340 * vxfs_clear_inode - remove inode from main memory 340 * vxfs_evict_inode - remove inode from main memory
341 * @ip: inode to discard. 341 * @ip: inode to discard.
342 * 342 *
343 * Description: 343 * Description:
344 * vxfs_clear_inode() is called on the final iput and frees the private 344 * vxfs_evict_inode() is called on the final iput and frees the private
345 * inode area. 345 * inode area.
346 */ 346 */
347void 347void
348vxfs_clear_inode(struct inode *ip) 348vxfs_evict_inode(struct inode *ip)
349{ 349{
350 truncate_inode_pages(&ip->i_data, 0);
351 end_writeback(ip);
350 kmem_cache_free(vxfs_inode_cachep, ip->i_private); 352 kmem_cache_free(vxfs_inode_cachep, ip->i_private);
351} 353}
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 1e8af939b3e4..dc0c041e85cb 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -61,7 +61,7 @@ static int vxfs_statfs(struct dentry *, struct kstatfs *);
61static int vxfs_remount(struct super_block *, int *, char *); 61static int vxfs_remount(struct super_block *, int *, char *);
62 62
63static const struct super_operations vxfs_super_ops = { 63static const struct super_operations vxfs_super_ops = {
64 .clear_inode = vxfs_clear_inode, 64 .evict_inode = vxfs_evict_inode,
65 .put_super = vxfs_put_super, 65 .put_super = vxfs_put_super,
66 .statfs = vxfs_statfs, 66 .statfs = vxfs_statfs,
67 .remount_fs = vxfs_remount, 67 .remount_fs = vxfs_remount,
@@ -135,7 +135,7 @@ static int vxfs_remount(struct super_block *sb, int *flags, char *data)
135} 135}
136 136
137/** 137/**
138 * vxfs_read_super - read superblock into memory and initalize filesystem 138 * vxfs_read_super - read superblock into memory and initialize filesystem
139 * @sbp: VFS superblock (to fill) 139 * @sbp: VFS superblock (to fill)
140 * @dp: fs private mount data 140 * @dp: fs private mount data
141 * @silent: do not complain loudly when sth is wrong 141 * @silent: do not complain loudly when sth is wrong
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d5be1693ac93..b7c7586caea1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -352,7 +352,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
352 352
353 spin_lock(&inode_lock); 353 spin_lock(&inode_lock);
354 inode->i_state &= ~I_SYNC; 354 inode->i_state &= ~I_SYNC;
355 if (!(inode->i_state & (I_FREEING | I_CLEAR))) { 355 if (!(inode->i_state & I_FREEING)) {
356 if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { 356 if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
357 /* 357 /*
358 * More pages get dirtied by a fast dirtier. 358 * More pages get dirtied by a fast dirtier.
@@ -499,7 +499,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
499 if (inode_dirtied_after(inode, wbc->wb_start)) 499 if (inode_dirtied_after(inode, wbc->wb_start))
500 return 1; 500 return 1;
501 501
502 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 502 BUG_ON(inode->i_state & I_FREEING);
503 __iget(inode); 503 __iget(inode);
504 pages_skipped = wbc->pages_skipped; 504 pages_skipped = wbc->pages_skipped;
505 writeback_single_inode(inode, wbc); 505 writeback_single_inode(inode, wbc);
@@ -530,7 +530,8 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
530{ 530{
531 int ret = 0; 531 int ret = 0;
532 532
533 wbc->wb_start = jiffies; /* livelock avoidance */ 533 if (!wbc->wb_start)
534 wbc->wb_start = jiffies; /* livelock avoidance */
534 spin_lock(&inode_lock); 535 spin_lock(&inode_lock);
535 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 536 if (!wbc->for_kupdate || list_empty(&wb->b_io))
536 queue_io(wb, wbc->older_than_this); 537 queue_io(wb, wbc->older_than_this);
@@ -559,7 +560,6 @@ static void __writeback_inodes_sb(struct super_block *sb,
559{ 560{
560 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 561 WARN_ON(!rwsem_is_locked(&sb->s_umount));
561 562
562 wbc->wb_start = jiffies; /* livelock avoidance */
563 spin_lock(&inode_lock); 563 spin_lock(&inode_lock);
564 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 564 if (!wbc->for_kupdate || list_empty(&wb->b_io))
565 queue_io(wb, wbc->older_than_this); 565 queue_io(wb, wbc->older_than_this);
@@ -625,6 +625,7 @@ static long wb_writeback(struct bdi_writeback *wb,
625 wbc.range_end = LLONG_MAX; 625 wbc.range_end = LLONG_MAX;
626 } 626 }
627 627
628 wbc.wb_start = jiffies; /* livelock avoidance */
628 for (;;) { 629 for (;;) {
629 /* 630 /*
630 * Stop writeback when nr_pages has been consumed 631 * Stop writeback when nr_pages has been consumed
@@ -935,7 +936,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
935 if (hlist_unhashed(&inode->i_hash)) 936 if (hlist_unhashed(&inode->i_hash))
936 goto out; 937 goto out;
937 } 938 }
938 if (inode->i_state & (I_FREEING|I_CLEAR)) 939 if (inode->i_state & I_FREEING)
939 goto out; 940 goto out;
940 941
941 /* 942 /*
@@ -1001,7 +1002,7 @@ static void wait_sb_inodes(struct super_block *sb)
1001 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1002 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1002 struct address_space *mapping; 1003 struct address_space *mapping;
1003 1004
1004 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 1005 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
1005 continue; 1006 continue;
1006 mapping = inode->i_mapping; 1007 mapping = inode->i_mapping;
1007 if (mapping->nrpages == 0) 1008 if (mapping->nrpages == 0)
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index cc94bb9563f2..3f6dfa989881 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -1,7 +1,6 @@
1 1
2config FSCACHE 2config FSCACHE
3 tristate "General filesystem local caching manager" 3 tristate "General filesystem local caching manager"
4 select SLOW_WORK
5 help 4 help
6 This option enables a generic filesystem caching manager that can be 5 This option enables a generic filesystem caching manager that can be
7 used by various network and other filesystems to cache data locally. 6 used by various network and other filesystems to cache data locally.
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index edd7434ab6e5..6a026441c5a6 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -82,6 +82,14 @@ extern unsigned fscache_defer_lookup;
82extern unsigned fscache_defer_create; 82extern unsigned fscache_defer_create;
83extern unsigned fscache_debug; 83extern unsigned fscache_debug;
84extern struct kobject *fscache_root; 84extern struct kobject *fscache_root;
85extern struct workqueue_struct *fscache_object_wq;
86extern struct workqueue_struct *fscache_op_wq;
87DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
88
89static inline bool fscache_object_congested(void)
90{
91 return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
92}
85 93
86extern int fscache_wait_bit(void *); 94extern int fscache_wait_bit(void *);
87extern int fscache_wait_bit_interruptible(void *); 95extern int fscache_wait_bit_interruptible(void *);
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index add6bdb53f04..f9d856773f79 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -15,6 +15,7 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/completion.h> 16#include <linux/completion.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/seq_file.h>
18#include "internal.h" 19#include "internal.h"
19 20
20MODULE_DESCRIPTION("FS Cache Manager"); 21MODULE_DESCRIPTION("FS Cache Manager");
@@ -40,22 +41,105 @@ MODULE_PARM_DESC(fscache_debug,
40 "FS-Cache debugging mask"); 41 "FS-Cache debugging mask");
41 42
42struct kobject *fscache_root; 43struct kobject *fscache_root;
44struct workqueue_struct *fscache_object_wq;
45struct workqueue_struct *fscache_op_wq;
46
47DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
48
49/* these values serve as lower bounds, will be adjusted in fscache_init() */
50static unsigned fscache_object_max_active = 4;
51static unsigned fscache_op_max_active = 2;
52
53#ifdef CONFIG_SYSCTL
54static struct ctl_table_header *fscache_sysctl_header;
55
56static int fscache_max_active_sysctl(struct ctl_table *table, int write,
57 void __user *buffer,
58 size_t *lenp, loff_t *ppos)
59{
60 struct workqueue_struct **wqp = table->extra1;
61 unsigned int *datap = table->data;
62 int ret;
63
64 ret = proc_dointvec(table, write, buffer, lenp, ppos);
65 if (ret == 0)
66 workqueue_set_max_active(*wqp, *datap);
67 return ret;
68}
69
70ctl_table fscache_sysctls[] = {
71 {
72 .procname = "object_max_active",
73 .data = &fscache_object_max_active,
74 .maxlen = sizeof(unsigned),
75 .mode = 0644,
76 .proc_handler = fscache_max_active_sysctl,
77 .extra1 = &fscache_object_wq,
78 },
79 {
80 .procname = "operation_max_active",
81 .data = &fscache_op_max_active,
82 .maxlen = sizeof(unsigned),
83 .mode = 0644,
84 .proc_handler = fscache_max_active_sysctl,
85 .extra1 = &fscache_op_wq,
86 },
87 {}
88};
89
90ctl_table fscache_sysctls_root[] = {
91 {
92 .procname = "fscache",
93 .mode = 0555,
94 .child = fscache_sysctls,
95 },
96 {}
97};
98#endif
43 99
44/* 100/*
45 * initialise the fs caching module 101 * initialise the fs caching module
46 */ 102 */
47static int __init fscache_init(void) 103static int __init fscache_init(void)
48{ 104{
105 unsigned int nr_cpus = num_possible_cpus();
106 unsigned int cpu;
49 int ret; 107 int ret;
50 108
51 ret = slow_work_register_user(THIS_MODULE); 109 fscache_object_max_active =
52 if (ret < 0) 110 clamp_val(nr_cpus,
53 goto error_slow_work; 111 fscache_object_max_active, WQ_UNBOUND_MAX_ACTIVE);
112
113 ret = -ENOMEM;
114 fscache_object_wq = alloc_workqueue("fscache_object", WQ_UNBOUND,
115 fscache_object_max_active);
116 if (!fscache_object_wq)
117 goto error_object_wq;
118
119 fscache_op_max_active =
120 clamp_val(fscache_object_max_active / 2,
121 fscache_op_max_active, WQ_UNBOUND_MAX_ACTIVE);
122
123 ret = -ENOMEM;
124 fscache_op_wq = alloc_workqueue("fscache_operation", WQ_UNBOUND,
125 fscache_op_max_active);
126 if (!fscache_op_wq)
127 goto error_op_wq;
128
129 for_each_possible_cpu(cpu)
130 init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
54 131
55 ret = fscache_proc_init(); 132 ret = fscache_proc_init();
56 if (ret < 0) 133 if (ret < 0)
57 goto error_proc; 134 goto error_proc;
58 135
136#ifdef CONFIG_SYSCTL
137 ret = -ENOMEM;
138 fscache_sysctl_header = register_sysctl_table(fscache_sysctls_root);
139 if (!fscache_sysctl_header)
140 goto error_sysctl;
141#endif
142
59 fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", 143 fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
60 sizeof(struct fscache_cookie), 144 sizeof(struct fscache_cookie),
61 0, 145 0,
@@ -78,10 +162,16 @@ static int __init fscache_init(void)
78error_kobj: 162error_kobj:
79 kmem_cache_destroy(fscache_cookie_jar); 163 kmem_cache_destroy(fscache_cookie_jar);
80error_cookie_jar: 164error_cookie_jar:
165#ifdef CONFIG_SYSCTL
166 unregister_sysctl_table(fscache_sysctl_header);
167error_sysctl:
168#endif
81 fscache_proc_cleanup(); 169 fscache_proc_cleanup();
82error_proc: 170error_proc:
83 slow_work_unregister_user(THIS_MODULE); 171 destroy_workqueue(fscache_op_wq);
84error_slow_work: 172error_op_wq:
173 destroy_workqueue(fscache_object_wq);
174error_object_wq:
85 return ret; 175 return ret;
86} 176}
87 177
@@ -96,8 +186,12 @@ static void __exit fscache_exit(void)
96 186
97 kobject_put(fscache_root); 187 kobject_put(fscache_root);
98 kmem_cache_destroy(fscache_cookie_jar); 188 kmem_cache_destroy(fscache_cookie_jar);
189#ifdef CONFIG_SYSCTL
190 unregister_sysctl_table(fscache_sysctl_header);
191#endif
99 fscache_proc_cleanup(); 192 fscache_proc_cleanup();
100 slow_work_unregister_user(THIS_MODULE); 193 destroy_workqueue(fscache_op_wq);
194 destroy_workqueue(fscache_object_wq);
101 printk(KERN_NOTICE "FS-Cache: Unloaded\n"); 195 printk(KERN_NOTICE "FS-Cache: Unloaded\n");
102} 196}
103 197
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 4a8eb31c5338..ebe29c581380 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -34,8 +34,8 @@ struct fscache_objlist_data {
34#define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */ 34#define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */
35#define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */ 35#define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */
36#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ 36#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */
37#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with slow work */ 37#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with work */
38#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without slow work */ 38#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without work */
39 39
40 u8 buf[512]; /* key and aux data buffer */ 40 u8 buf[512]; /* key and aux data buffer */
41}; 41};
@@ -231,12 +231,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
231 READS, NOREADS); 231 READS, NOREADS);
232 FILTER(obj->events & obj->event_mask, 232 FILTER(obj->events & obj->event_mask,
233 EVENTS, NOEVENTS); 233 EVENTS, NOEVENTS);
234 FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW), 234 FILTER(work_busy(&obj->work), WORK, NOWORK);
235 WORK, NOWORK);
236 } 235 }
237 236
238 seq_printf(m, 237 seq_printf(m,
239 "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ", 238 "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1x | ",
240 obj->debug_id, 239 obj->debug_id,
241 obj->parent ? obj->parent->debug_id : -1, 240 obj->parent ? obj->parent->debug_id : -1,
242 fscache_object_states_short[obj->state], 241 fscache_object_states_short[obj->state],
@@ -249,7 +248,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
249 obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, 248 obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK,
250 obj->events, 249 obj->events,
251 obj->flags, 250 obj->flags,
252 obj->work.flags); 251 work_busy(&obj->work));
253 252
254 no_cookie = true; 253 no_cookie = true;
255 keylen = auxlen = 0; 254 keylen = auxlen = 0;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 0b589a9b4ffc..b6b897c550ac 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -14,7 +14,6 @@
14 14
15#define FSCACHE_DEBUG_LEVEL COOKIE 15#define FSCACHE_DEBUG_LEVEL COOKIE
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/seq_file.h>
18#include "internal.h" 17#include "internal.h"
19 18
20const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { 19const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
@@ -50,12 +49,8 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
50 [FSCACHE_OBJECT_DEAD] = "DEAD", 49 [FSCACHE_OBJECT_DEAD] = "DEAD",
51}; 50};
52 51
53static void fscache_object_slow_work_put_ref(struct slow_work *); 52static int fscache_get_object(struct fscache_object *);
54static int fscache_object_slow_work_get_ref(struct slow_work *); 53static void fscache_put_object(struct fscache_object *);
55static void fscache_object_slow_work_execute(struct slow_work *);
56#ifdef CONFIG_SLOW_WORK_DEBUG
57static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
58#endif
59static void fscache_initialise_object(struct fscache_object *); 54static void fscache_initialise_object(struct fscache_object *);
60static void fscache_lookup_object(struct fscache_object *); 55static void fscache_lookup_object(struct fscache_object *);
61static void fscache_object_available(struct fscache_object *); 56static void fscache_object_available(struct fscache_object *);
@@ -64,17 +59,6 @@ static void fscache_withdraw_object(struct fscache_object *);
64static void fscache_enqueue_dependents(struct fscache_object *); 59static void fscache_enqueue_dependents(struct fscache_object *);
65static void fscache_dequeue_object(struct fscache_object *); 60static void fscache_dequeue_object(struct fscache_object *);
66 61
67const struct slow_work_ops fscache_object_slow_work_ops = {
68 .owner = THIS_MODULE,
69 .get_ref = fscache_object_slow_work_get_ref,
70 .put_ref = fscache_object_slow_work_put_ref,
71 .execute = fscache_object_slow_work_execute,
72#ifdef CONFIG_SLOW_WORK_DEBUG
73 .desc = fscache_object_slow_work_desc,
74#endif
75};
76EXPORT_SYMBOL(fscache_object_slow_work_ops);
77
78/* 62/*
79 * we need to notify the parent when an op completes that we had outstanding 63 * we need to notify the parent when an op completes that we had outstanding
80 * upon it 64 * upon it
@@ -345,7 +329,7 @@ unsupported_event:
345/* 329/*
346 * execute an object 330 * execute an object
347 */ 331 */
348static void fscache_object_slow_work_execute(struct slow_work *work) 332void fscache_object_work_func(struct work_struct *work)
349{ 333{
350 struct fscache_object *object = 334 struct fscache_object *object =
351 container_of(work, struct fscache_object, work); 335 container_of(work, struct fscache_object, work);
@@ -359,23 +343,9 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
359 if (object->events & object->event_mask) 343 if (object->events & object->event_mask)
360 fscache_enqueue_object(object); 344 fscache_enqueue_object(object);
361 clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); 345 clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
346 fscache_put_object(object);
362} 347}
363 348EXPORT_SYMBOL(fscache_object_work_func);
364/*
365 * describe an object for slow-work debugging
366 */
367#ifdef CONFIG_SLOW_WORK_DEBUG
368static void fscache_object_slow_work_desc(struct slow_work *work,
369 struct seq_file *m)
370{
371 struct fscache_object *object =
372 container_of(work, struct fscache_object, work);
373
374 seq_printf(m, "FSC: OBJ%x: %s",
375 object->debug_id,
376 fscache_object_states_short[object->state]);
377}
378#endif
379 349
380/* 350/*
381 * initialise an object 351 * initialise an object
@@ -393,7 +363,6 @@ static void fscache_initialise_object(struct fscache_object *object)
393 _enter(""); 363 _enter("");
394 ASSERT(object->cookie != NULL); 364 ASSERT(object->cookie != NULL);
395 ASSERT(object->cookie->parent != NULL); 365 ASSERT(object->cookie->parent != NULL);
396 ASSERT(list_empty(&object->work.link));
397 366
398 if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | 367 if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) |
399 (1 << FSCACHE_OBJECT_EV_RELEASE) | 368 (1 << FSCACHE_OBJECT_EV_RELEASE) |
@@ -671,10 +640,8 @@ static void fscache_drop_object(struct fscache_object *object)
671 object->parent = NULL; 640 object->parent = NULL;
672 } 641 }
673 642
674 /* this just shifts the object release to the slow work processor */ 643 /* this just shifts the object release to the work processor */
675 fscache_stat(&fscache_n_cop_put_object); 644 fscache_put_object(object);
676 object->cache->ops->put_object(object);
677 fscache_stat_d(&fscache_n_cop_put_object);
678 645
679 _leave(""); 646 _leave("");
680} 647}
@@ -758,12 +725,10 @@ void fscache_withdrawing_object(struct fscache_cache *cache,
758} 725}
759 726
760/* 727/*
761 * allow the slow work item processor to get a ref on an object 728 * get a ref on an object
762 */ 729 */
763static int fscache_object_slow_work_get_ref(struct slow_work *work) 730static int fscache_get_object(struct fscache_object *object)
764{ 731{
765 struct fscache_object *object =
766 container_of(work, struct fscache_object, work);
767 int ret; 732 int ret;
768 733
769 fscache_stat(&fscache_n_cop_grab_object); 734 fscache_stat(&fscache_n_cop_grab_object);
@@ -773,13 +738,10 @@ static int fscache_object_slow_work_get_ref(struct slow_work *work)
773} 738}
774 739
775/* 740/*
776 * allow the slow work item processor to discard a ref on a work item 741 * discard a ref on a work item
777 */ 742 */
778static void fscache_object_slow_work_put_ref(struct slow_work *work) 743static void fscache_put_object(struct fscache_object *object)
779{ 744{
780 struct fscache_object *object =
781 container_of(work, struct fscache_object, work);
782
783 fscache_stat(&fscache_n_cop_put_object); 745 fscache_stat(&fscache_n_cop_put_object);
784 object->cache->ops->put_object(object); 746 object->cache->ops->put_object(object);
785 fscache_stat_d(&fscache_n_cop_put_object); 747 fscache_stat_d(&fscache_n_cop_put_object);
@@ -792,8 +754,48 @@ void fscache_enqueue_object(struct fscache_object *object)
792{ 754{
793 _enter("{OBJ%x}", object->debug_id); 755 _enter("{OBJ%x}", object->debug_id);
794 756
795 slow_work_enqueue(&object->work); 757 if (fscache_get_object(object) >= 0) {
758 wait_queue_head_t *cong_wq =
759 &get_cpu_var(fscache_object_cong_wait);
760
761 if (queue_work(fscache_object_wq, &object->work)) {
762 if (fscache_object_congested())
763 wake_up(cong_wq);
764 } else
765 fscache_put_object(object);
766
767 put_cpu_var(fscache_object_cong_wait);
768 }
769}
770
771/**
772 * fscache_object_sleep_till_congested - Sleep until object wq is congested
773 * @timoutp: Scheduler sleep timeout
774 *
775 * Allow an object handler to sleep until the object workqueue is congested.
776 *
777 * The caller must set up a wake up event before calling this and must have set
778 * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own
779 * condition before calling this function as no test is made here.
780 *
781 * %true is returned if the object wq is congested, %false otherwise.
782 */
783bool fscache_object_sleep_till_congested(signed long *timeoutp)
784{
785 wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait);
786 DEFINE_WAIT(wait);
787
788 if (fscache_object_congested())
789 return true;
790
791 add_wait_queue_exclusive(cong_wq, &wait);
792 if (!fscache_object_congested())
793 *timeoutp = schedule_timeout(*timeoutp);
794 finish_wait(cong_wq, &wait);
795
796 return fscache_object_congested();
796} 797}
798EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested);
797 799
798/* 800/*
799 * enqueue the dependents of an object for metadata-type processing 801 * enqueue the dependents of an object for metadata-type processing
@@ -819,9 +821,7 @@ static void fscache_enqueue_dependents(struct fscache_object *object)
819 821
820 /* sort onto appropriate lists */ 822 /* sort onto appropriate lists */
821 fscache_enqueue_object(dep); 823 fscache_enqueue_object(dep);
822 fscache_stat(&fscache_n_cop_put_object); 824 fscache_put_object(dep);
823 dep->cache->ops->put_object(dep);
824 fscache_stat_d(&fscache_n_cop_put_object);
825 825
826 if (!list_empty(&object->dependents)) 826 if (!list_empty(&object->dependents))
827 cond_resched_lock(&object->lock); 827 cond_resched_lock(&object->lock);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index f17cecafae44..b9f34eaede09 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -42,16 +42,12 @@ void fscache_enqueue_operation(struct fscache_operation *op)
42 42
43 fscache_stat(&fscache_n_op_enqueue); 43 fscache_stat(&fscache_n_op_enqueue);
44 switch (op->flags & FSCACHE_OP_TYPE) { 44 switch (op->flags & FSCACHE_OP_TYPE) {
45 case FSCACHE_OP_FAST: 45 case FSCACHE_OP_ASYNC:
46 _debug("queue fast"); 46 _debug("queue async");
47 atomic_inc(&op->usage); 47 atomic_inc(&op->usage);
48 if (!schedule_work(&op->fast_work)) 48 if (!queue_work(fscache_op_wq, &op->work))
49 fscache_put_operation(op); 49 fscache_put_operation(op);
50 break; 50 break;
51 case FSCACHE_OP_SLOW:
52 _debug("queue slow");
53 slow_work_enqueue(&op->slow_work);
54 break;
55 case FSCACHE_OP_MYTHREAD: 51 case FSCACHE_OP_MYTHREAD:
56 _debug("queue for caller's attention"); 52 _debug("queue for caller's attention");
57 break; 53 break;
@@ -455,36 +451,13 @@ void fscache_operation_gc(struct work_struct *work)
455} 451}
456 452
457/* 453/*
458 * allow the slow work item processor to get a ref on an operation 454 * execute an operation using fs_op_wq to provide processing context -
459 */ 455 * the caller holds a ref to this object, so we don't need to hold one
460static int fscache_op_get_ref(struct slow_work *work)
461{
462 struct fscache_operation *op =
463 container_of(work, struct fscache_operation, slow_work);
464
465 atomic_inc(&op->usage);
466 return 0;
467}
468
469/*
470 * allow the slow work item processor to discard a ref on an operation
471 */
472static void fscache_op_put_ref(struct slow_work *work)
473{
474 struct fscache_operation *op =
475 container_of(work, struct fscache_operation, slow_work);
476
477 fscache_put_operation(op);
478}
479
480/*
481 * execute an operation using the slow thread pool to provide processing context
482 * - the caller holds a ref to this object, so we don't need to hold one
483 */ 456 */
484static void fscache_op_execute(struct slow_work *work) 457void fscache_op_work_func(struct work_struct *work)
485{ 458{
486 struct fscache_operation *op = 459 struct fscache_operation *op =
487 container_of(work, struct fscache_operation, slow_work); 460 container_of(work, struct fscache_operation, work);
488 unsigned long start; 461 unsigned long start;
489 462
490 _enter("{OBJ%x OP%x,%d}", 463 _enter("{OBJ%x OP%x,%d}",
@@ -494,31 +467,7 @@ static void fscache_op_execute(struct slow_work *work)
494 start = jiffies; 467 start = jiffies;
495 op->processor(op); 468 op->processor(op);
496 fscache_hist(fscache_ops_histogram, start); 469 fscache_hist(fscache_ops_histogram, start);
470 fscache_put_operation(op);
497 471
498 _leave(""); 472 _leave("");
499} 473}
500
501/*
502 * describe an operation for slow-work debugging
503 */
504#ifdef CONFIG_SLOW_WORK_DEBUG
505static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
506{
507 struct fscache_operation *op =
508 container_of(work, struct fscache_operation, slow_work);
509
510 seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx",
511 op->object->debug_id, op->debug_id,
512 op->name, op->state, op->flags);
513}
514#endif
515
516const struct slow_work_ops fscache_op_slow_work_ops = {
517 .owner = THIS_MODULE,
518 .get_ref = fscache_op_get_ref,
519 .put_ref = fscache_op_put_ref,
520 .execute = fscache_op_execute,
521#ifdef CONFIG_SLOW_WORK_DEBUG
522 .desc = fscache_op_desc,
523#endif
524};
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 723b889fd219..41c441c2058d 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -105,7 +105,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
105 105
106page_busy: 106page_busy:
107 /* we might want to wait here, but that could deadlock the allocator as 107 /* we might want to wait here, but that could deadlock the allocator as
108 * the slow-work threads writing to the cache may all end up sleeping 108 * the work threads writing to the cache may all end up sleeping
109 * on memory allocation */ 109 * on memory allocation */
110 fscache_stat(&fscache_n_store_vmscan_busy); 110 fscache_stat(&fscache_n_store_vmscan_busy);
111 return false; 111 return false;
@@ -188,9 +188,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
188 return -ENOMEM; 188 return -ENOMEM;
189 } 189 }
190 190
191 fscache_operation_init(op, NULL); 191 fscache_operation_init(op, fscache_attr_changed_op, NULL);
192 fscache_operation_init_slow(op, fscache_attr_changed_op); 192 op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
193 op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE);
194 fscache_set_op_name(op, "Attr"); 193 fscache_set_op_name(op, "Attr");
195 194
196 spin_lock(&cookie->lock); 195 spin_lock(&cookie->lock);
@@ -218,24 +217,6 @@ nobufs:
218EXPORT_SYMBOL(__fscache_attr_changed); 217EXPORT_SYMBOL(__fscache_attr_changed);
219 218
220/* 219/*
221 * handle secondary execution given to a retrieval op on behalf of the
222 * cache
223 */
224static void fscache_retrieval_work(struct work_struct *work)
225{
226 struct fscache_retrieval *op =
227 container_of(work, struct fscache_retrieval, op.fast_work);
228 unsigned long start;
229
230 _enter("{OP%x}", op->op.debug_id);
231
232 start = jiffies;
233 op->op.processor(&op->op);
234 fscache_hist(fscache_ops_histogram, start);
235 fscache_put_operation(&op->op);
236}
237
238/*
239 * release a retrieval op reference 220 * release a retrieval op reference
240 */ 221 */
241static void fscache_release_retrieval_op(struct fscache_operation *_op) 222static void fscache_release_retrieval_op(struct fscache_operation *_op)
@@ -269,13 +250,12 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
269 return NULL; 250 return NULL;
270 } 251 }
271 252
272 fscache_operation_init(&op->op, fscache_release_retrieval_op); 253 fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op);
273 op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); 254 op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING);
274 op->mapping = mapping; 255 op->mapping = mapping;
275 op->end_io_func = end_io_func; 256 op->end_io_func = end_io_func;
276 op->context = context; 257 op->context = context;
277 op->start_time = jiffies; 258 op->start_time = jiffies;
278 INIT_WORK(&op->op.fast_work, fscache_retrieval_work);
279 INIT_LIST_HEAD(&op->to_do); 259 INIT_LIST_HEAD(&op->to_do);
280 fscache_set_op_name(&op->op, "Retr"); 260 fscache_set_op_name(&op->op, "Retr");
281 return op; 261 return op;
@@ -795,9 +775,9 @@ int __fscache_write_page(struct fscache_cookie *cookie,
795 if (!op) 775 if (!op)
796 goto nomem; 776 goto nomem;
797 777
798 fscache_operation_init(&op->op, fscache_release_write_op); 778 fscache_operation_init(&op->op, fscache_write_op,
799 fscache_operation_init_slow(&op->op, fscache_write_op); 779 fscache_release_write_op);
800 op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); 780 op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
801 fscache_set_op_name(&op->op, "Write1"); 781 fscache_set_op_name(&op->op, "Write1");
802 782
803 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); 783 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
@@ -852,7 +832,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
852 fscache_stat(&fscache_n_store_ops); 832 fscache_stat(&fscache_n_store_ops);
853 fscache_stat(&fscache_n_stores_ok); 833 fscache_stat(&fscache_n_stores_ok);
854 834
855 /* the slow work queue now carries its own ref on the object */ 835 /* the work queue now carries its own ref on the object */
856 fscache_put_operation(&op->op); 836 fscache_put_operation(&op->op);
857 _leave(" = 0"); 837 _leave(" = 0");
858 return 0; 838 return 0;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9424796d6634..69ad053ffd78 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -239,7 +239,6 @@ static u64 fuse_get_unique(struct fuse_conn *fc)
239 239
240static void queue_request(struct fuse_conn *fc, struct fuse_req *req) 240static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
241{ 241{
242 req->in.h.unique = fuse_get_unique(fc);
243 req->in.h.len = sizeof(struct fuse_in_header) + 242 req->in.h.len = sizeof(struct fuse_in_header) +
244 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 243 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
245 list_add_tail(&req->list, &fc->pending); 244 list_add_tail(&req->list, &fc->pending);
@@ -261,6 +260,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
261 req = list_entry(fc->bg_queue.next, struct fuse_req, list); 260 req = list_entry(fc->bg_queue.next, struct fuse_req, list);
262 list_del(&req->list); 261 list_del(&req->list);
263 fc->active_background++; 262 fc->active_background++;
263 req->in.h.unique = fuse_get_unique(fc);
264 queue_request(fc, req); 264 queue_request(fc, req);
265 } 265 }
266} 266}
@@ -398,6 +398,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
398 else if (fc->conn_error) 398 else if (fc->conn_error)
399 req->out.h.error = -ECONNREFUSED; 399 req->out.h.error = -ECONNREFUSED;
400 else { 400 else {
401 req->in.h.unique = fuse_get_unique(fc);
401 queue_request(fc, req); 402 queue_request(fc, req);
402 /* acquire extra reference, since request is still needed 403 /* acquire extra reference, since request is still needed
403 after request_end() */ 404 after request_end() */
@@ -450,6 +451,23 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
450} 451}
451EXPORT_SYMBOL_GPL(fuse_request_send_background); 452EXPORT_SYMBOL_GPL(fuse_request_send_background);
452 453
454static int fuse_request_send_notify_reply(struct fuse_conn *fc,
455 struct fuse_req *req, u64 unique)
456{
457 int err = -ENODEV;
458
459 req->isreply = 0;
460 req->in.h.unique = unique;
461 spin_lock(&fc->lock);
462 if (fc->connected) {
463 queue_request(fc, req);
464 err = 0;
465 }
466 spin_unlock(&fc->lock);
467
468 return err;
469}
470
453/* 471/*
454 * Called under fc->lock 472 * Called under fc->lock
455 * 473 *
@@ -535,13 +553,13 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
535 if (!cs->write) { 553 if (!cs->write) {
536 buf->ops->unmap(cs->pipe, buf, cs->mapaddr); 554 buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
537 } else { 555 } else {
538 kunmap_atomic(cs->mapaddr, KM_USER0); 556 kunmap(buf->page);
539 buf->len = PAGE_SIZE - cs->len; 557 buf->len = PAGE_SIZE - cs->len;
540 } 558 }
541 cs->currbuf = NULL; 559 cs->currbuf = NULL;
542 cs->mapaddr = NULL; 560 cs->mapaddr = NULL;
543 } else if (cs->mapaddr) { 561 } else if (cs->mapaddr) {
544 kunmap_atomic(cs->mapaddr, KM_USER0); 562 kunmap(cs->pg);
545 if (cs->write) { 563 if (cs->write) {
546 flush_dcache_page(cs->pg); 564 flush_dcache_page(cs->pg);
547 set_page_dirty_lock(cs->pg); 565 set_page_dirty_lock(cs->pg);
@@ -572,7 +590,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
572 590
573 BUG_ON(!cs->nr_segs); 591 BUG_ON(!cs->nr_segs);
574 cs->currbuf = buf; 592 cs->currbuf = buf;
575 cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); 593 cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
576 cs->len = buf->len; 594 cs->len = buf->len;
577 cs->buf = cs->mapaddr + buf->offset; 595 cs->buf = cs->mapaddr + buf->offset;
578 cs->pipebufs++; 596 cs->pipebufs++;
@@ -592,7 +610,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
592 buf->len = 0; 610 buf->len = 0;
593 611
594 cs->currbuf = buf; 612 cs->currbuf = buf;
595 cs->mapaddr = kmap_atomic(page, KM_USER0); 613 cs->mapaddr = kmap(page);
596 cs->buf = cs->mapaddr; 614 cs->buf = cs->mapaddr;
597 cs->len = PAGE_SIZE; 615 cs->len = PAGE_SIZE;
598 cs->pipebufs++; 616 cs->pipebufs++;
@@ -611,7 +629,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
611 return err; 629 return err;
612 BUG_ON(err != 1); 630 BUG_ON(err != 1);
613 offset = cs->addr % PAGE_SIZE; 631 offset = cs->addr % PAGE_SIZE;
614 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); 632 cs->mapaddr = kmap(cs->pg);
615 cs->buf = cs->mapaddr + offset; 633 cs->buf = cs->mapaddr + offset;
616 cs->len = min(PAGE_SIZE - offset, cs->seglen); 634 cs->len = min(PAGE_SIZE - offset, cs->seglen);
617 cs->seglen -= cs->len; 635 cs->seglen -= cs->len;
@@ -1231,6 +1249,199 @@ err:
1231 return err; 1249 return err;
1232} 1250}
1233 1251
1252static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1253 struct fuse_copy_state *cs)
1254{
1255 struct fuse_notify_store_out outarg;
1256 struct inode *inode;
1257 struct address_space *mapping;
1258 u64 nodeid;
1259 int err;
1260 pgoff_t index;
1261 unsigned int offset;
1262 unsigned int num;
1263 loff_t file_size;
1264 loff_t end;
1265
1266 err = -EINVAL;
1267 if (size < sizeof(outarg))
1268 goto out_finish;
1269
1270 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1271 if (err)
1272 goto out_finish;
1273
1274 err = -EINVAL;
1275 if (size - sizeof(outarg) != outarg.size)
1276 goto out_finish;
1277
1278 nodeid = outarg.nodeid;
1279
1280 down_read(&fc->killsb);
1281
1282 err = -ENOENT;
1283 if (!fc->sb)
1284 goto out_up_killsb;
1285
1286 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1287 if (!inode)
1288 goto out_up_killsb;
1289
1290 mapping = inode->i_mapping;
1291 index = outarg.offset >> PAGE_CACHE_SHIFT;
1292 offset = outarg.offset & ~PAGE_CACHE_MASK;
1293 file_size = i_size_read(inode);
1294 end = outarg.offset + outarg.size;
1295 if (end > file_size) {
1296 file_size = end;
1297 fuse_write_update_size(inode, file_size);
1298 }
1299
1300 num = outarg.size;
1301 while (num) {
1302 struct page *page;
1303 unsigned int this_num;
1304
1305 err = -ENOMEM;
1306 page = find_or_create_page(mapping, index,
1307 mapping_gfp_mask(mapping));
1308 if (!page)
1309 goto out_iput;
1310
1311 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1312 err = fuse_copy_page(cs, &page, offset, this_num, 0);
1313 if (!err && offset == 0 && (num != 0 || file_size == end))
1314 SetPageUptodate(page);
1315 unlock_page(page);
1316 page_cache_release(page);
1317
1318 if (err)
1319 goto out_iput;
1320
1321 num -= this_num;
1322 offset = 0;
1323 index++;
1324 }
1325
1326 err = 0;
1327
1328out_iput:
1329 iput(inode);
1330out_up_killsb:
1331 up_read(&fc->killsb);
1332out_finish:
1333 fuse_copy_finish(cs);
1334 return err;
1335}
1336
1337static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1338{
1339 int i;
1340
1341 for (i = 0; i < req->num_pages; i++) {
1342 struct page *page = req->pages[i];
1343 page_cache_release(page);
1344 }
1345}
1346
1347static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1348 struct fuse_notify_retrieve_out *outarg)
1349{
1350 int err;
1351 struct address_space *mapping = inode->i_mapping;
1352 struct fuse_req *req;
1353 pgoff_t index;
1354 loff_t file_size;
1355 unsigned int num;
1356 unsigned int offset;
1357 size_t total_len;
1358
1359 req = fuse_get_req(fc);
1360 if (IS_ERR(req))
1361 return PTR_ERR(req);
1362
1363 offset = outarg->offset & ~PAGE_CACHE_MASK;
1364
1365 req->in.h.opcode = FUSE_NOTIFY_REPLY;
1366 req->in.h.nodeid = outarg->nodeid;
1367 req->in.numargs = 2;
1368 req->in.argpages = 1;
1369 req->page_offset = offset;
1370 req->end = fuse_retrieve_end;
1371
1372 index = outarg->offset >> PAGE_CACHE_SHIFT;
1373 file_size = i_size_read(inode);
1374 num = outarg->size;
1375 if (outarg->offset > file_size)
1376 num = 0;
1377 else if (outarg->offset + num > file_size)
1378 num = file_size - outarg->offset;
1379
1380 while (num) {
1381 struct page *page;
1382 unsigned int this_num;
1383
1384 page = find_get_page(mapping, index);
1385 if (!page)
1386 break;
1387
1388 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1389 req->pages[req->num_pages] = page;
1390 req->num_pages++;
1391
1392 num -= this_num;
1393 total_len += this_num;
1394 }
1395 req->misc.retrieve_in.offset = outarg->offset;
1396 req->misc.retrieve_in.size = total_len;
1397 req->in.args[0].size = sizeof(req->misc.retrieve_in);
1398 req->in.args[0].value = &req->misc.retrieve_in;
1399 req->in.args[1].size = total_len;
1400
1401 err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1402 if (err)
1403 fuse_retrieve_end(fc, req);
1404
1405 return err;
1406}
1407
1408static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1409 struct fuse_copy_state *cs)
1410{
1411 struct fuse_notify_retrieve_out outarg;
1412 struct inode *inode;
1413 int err;
1414
1415 err = -EINVAL;
1416 if (size != sizeof(outarg))
1417 goto copy_finish;
1418
1419 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1420 if (err)
1421 goto copy_finish;
1422
1423 fuse_copy_finish(cs);
1424
1425 down_read(&fc->killsb);
1426 err = -ENOENT;
1427 if (fc->sb) {
1428 u64 nodeid = outarg.nodeid;
1429
1430 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1431 if (inode) {
1432 err = fuse_retrieve(fc, inode, &outarg);
1433 iput(inode);
1434 }
1435 }
1436 up_read(&fc->killsb);
1437
1438 return err;
1439
1440copy_finish:
1441 fuse_copy_finish(cs);
1442 return err;
1443}
1444
1234static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 1445static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1235 unsigned int size, struct fuse_copy_state *cs) 1446 unsigned int size, struct fuse_copy_state *cs)
1236{ 1447{
@@ -1244,6 +1455,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1244 case FUSE_NOTIFY_INVAL_ENTRY: 1455 case FUSE_NOTIFY_INVAL_ENTRY:
1245 return fuse_notify_inval_entry(fc, size, cs); 1456 return fuse_notify_inval_entry(fc, size, cs);
1246 1457
1458 case FUSE_NOTIFY_STORE:
1459 return fuse_notify_store(fc, size, cs);
1460
1461 case FUSE_NOTIFY_RETRIEVE:
1462 return fuse_notify_retrieve(fc, size, cs);
1463
1247 default: 1464 default:
1248 fuse_copy_finish(cs); 1465 fuse_copy_finish(cs);
1249 return -EINVAL; 1466 return -EINVAL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3cdc5f78a406..c9627c95482d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1016,7 +1016,7 @@ static int fuse_permission(struct inode *inode, int mask)
1016 exist. So if permissions are revoked this won't be 1016 exist. So if permissions are revoked this won't be
1017 noticed immediately, only after the attribute 1017 noticed immediately, only after the attribute
1018 timeout has expired */ 1018 timeout has expired */
1019 } else if (mask & MAY_ACCESS) { 1019 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1020 err = fuse_access(inode, mask); 1020 err = fuse_access(inode, mask);
1021 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1021 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1022 if (!(inode->i_mode & S_IXUGO)) { 1022 if (!(inode->i_mode & S_IXUGO)) {
@@ -1270,21 +1270,18 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1270 if (!fuse_allow_task(fc, current)) 1270 if (!fuse_allow_task(fc, current))
1271 return -EACCES; 1271 return -EACCES;
1272 1272
1273 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1273 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1274 err = inode_change_ok(inode, attr); 1274 attr->ia_valid |= ATTR_FORCE;
1275 if (err) 1275
1276 return err; 1276 err = inode_change_ok(inode, attr);
1277 } 1277 if (err)
1278 return err;
1278 1279
1279 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1280 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
1280 return 0; 1281 return 0;
1281 1282
1282 if (attr->ia_valid & ATTR_SIZE) { 1283 if (attr->ia_valid & ATTR_SIZE)
1283 err = inode_newsize_ok(inode, attr->ia_size);
1284 if (err)
1285 return err;
1286 is_truncate = true; 1284 is_truncate = true;
1287 }
1288 1285
1289 req = fuse_get_req(fc); 1286 req = fuse_get_req(fc);
1290 if (IS_ERR(req)) 1287 if (IS_ERR(req))
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ada0adeb3bb5..147c1f71bdb9 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -706,7 +706,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
706 return 0; 706 return 0;
707} 707}
708 708
709static void fuse_write_update_size(struct inode *inode, loff_t pos) 709void fuse_write_update_size(struct inode *inode, loff_t pos)
710{ 710{
711 struct fuse_conn *fc = get_fuse_conn(inode); 711 struct fuse_conn *fc = get_fuse_conn(inode);
712 struct fuse_inode *fi = get_fuse_inode(inode); 712 struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 8f309f04064e..57d4a3a0f102 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,6 +272,7 @@ struct fuse_req {
272 struct fuse_write_in in; 272 struct fuse_write_in in;
273 struct fuse_write_out out; 273 struct fuse_write_out out;
274 } write; 274 } write;
275 struct fuse_notify_retrieve_in retrieve_in;
275 struct fuse_lk_in lk_in; 276 struct fuse_lk_in lk_in;
276 } misc; 277 } misc;
277 278
@@ -748,4 +749,6 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
748unsigned fuse_file_poll(struct file *file, poll_table *wait); 749unsigned fuse_file_poll(struct file *file, poll_table *wait);
749int fuse_dev_release(struct inode *inode, struct file *file); 750int fuse_dev_release(struct inode *inode, struct file *file);
750 751
752void fuse_write_update_size(struct inode *inode, loff_t pos);
753
751#endif /* _FS_FUSE_I_H */ 754#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ec14d19ce501..da9e6e11374c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -122,8 +122,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
122 fuse_request_send_noreply(fc, req); 122 fuse_request_send_noreply(fc, req);
123} 123}
124 124
125static void fuse_clear_inode(struct inode *inode) 125static void fuse_evict_inode(struct inode *inode)
126{ 126{
127 truncate_inode_pages(&inode->i_data, 0);
128 end_writeback(inode);
127 if (inode->i_sb->s_flags & MS_ACTIVE) { 129 if (inode->i_sb->s_flags & MS_ACTIVE) {
128 struct fuse_conn *fc = get_fuse_conn(inode); 130 struct fuse_conn *fc = get_fuse_conn(inode);
129 struct fuse_inode *fi = get_fuse_inode(inode); 131 struct fuse_inode *fi = get_fuse_inode(inode);
@@ -736,7 +738,7 @@ static const struct export_operations fuse_export_operations = {
736static const struct super_operations fuse_super_operations = { 738static const struct super_operations fuse_super_operations = {
737 .alloc_inode = fuse_alloc_inode, 739 .alloc_inode = fuse_alloc_inode,
738 .destroy_inode = fuse_destroy_inode, 740 .destroy_inode = fuse_destroy_inode,
739 .clear_inode = fuse_clear_inode, 741 .evict_inode = fuse_evict_inode,
740 .drop_inode = generic_delete_inode, 742 .drop_inode = generic_delete_inode,
741 .remount_fs = fuse_remount_fs, 743 .remount_fs = fuse_remount_fs,
742 .put_super = fuse_put_super, 744 .put_super = fuse_put_super,
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index a47b43107112..cc9665522148 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -7,7 +7,6 @@ config GFS2_FS
7 select IP_SCTP if DLM_SCTP 7 select IP_SCTP if DLM_SCTP
8 select FS_POSIX_ACL 8 select FS_POSIX_ACL
9 select CRC32 9 select CRC32
10 select SLOW_WORK
11 select QUOTACTL 10 select QUOTACTL
12 help 11 help
13 A cluster filesystem. 12 A cluster filesystem.
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 9f8b52500d63..194fe16d8418 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -136,10 +136,7 @@ static int gfs2_writeback_writepage(struct page *page,
136 if (ret <= 0) 136 if (ret <= 0)
137 return ret; 137 return ret;
138 138
139 ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); 139 return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
140 if (ret == -EAGAIN)
141 ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
142 return ret;
143} 140}
144 141
145/** 142/**
@@ -637,9 +634,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
637 } 634 }
638 } 635 }
639 636
640 error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); 637 alloc_required = gfs2_write_alloc_required(ip, pos, len);
641 if (error)
642 goto out_unlock;
643 638
644 if (alloc_required || gfs2_is_jdata(ip)) 639 if (alloc_required || gfs2_is_jdata(ip))
645 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); 640 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
@@ -702,12 +697,12 @@ out:
702 page_cache_release(page); 697 page_cache_release(page);
703 698
704 /* 699 /*
705 * XXX(hch): the call below should probably be replaced with 700 * XXX(truncate): the call below should probably be replaced with
706 * a call to the gfs2-specific truncate blocks helper to actually 701 * a call to the gfs2-specific truncate blocks helper to actually
707 * release disk blocks.. 702 * release disk blocks..
708 */ 703 */
709 if (pos + len > ip->i_inode.i_size) 704 if (pos + len > ip->i_inode.i_size)
710 simple_setsize(&ip->i_inode, ip->i_inode.i_size); 705 truncate_setsize(&ip->i_inode, ip->i_inode.i_size);
711out_endtrans: 706out_endtrans:
712 gfs2_trans_end(sdp); 707 gfs2_trans_end(sdp);
713out_trans_fail: 708out_trans_fail:
@@ -1047,9 +1042,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1047 if (rv != 1) 1042 if (rv != 1)
1048 goto out; /* dio not valid, fall back to buffered i/o */ 1043 goto out; /* dio not valid, fall back to buffered i/o */
1049 1044
1050 rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, 1045 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1051 iov, offset, nr_segs, 1046 offset, nr_segs, gfs2_get_block_direct,
1052 gfs2_get_block_direct, NULL); 1047 NULL, NULL, 0);
1053out: 1048out:
1054 gfs2_glock_dq_m(1, &gh); 1049 gfs2_glock_dq_m(1, &gh);
1055 gfs2_holder_uninit(&gh); 1050 gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 84da64b551b2..6f482809d1a3 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1040,7 +1040,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
1040 goto out; 1040 goto out;
1041 1041
1042 if (gfs2_is_stuffed(ip)) { 1042 if (gfs2_is_stuffed(ip)) {
1043 u64 dsize = size + sizeof(struct gfs2_inode); 1043 u64 dsize = size + sizeof(struct gfs2_dinode);
1044 ip->i_disksize = size; 1044 ip->i_disksize = size;
1045 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1045 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1046 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1046 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -1244,13 +1244,12 @@ int gfs2_file_dealloc(struct gfs2_inode *ip)
1244 * @ip: the file being written to 1244 * @ip: the file being written to
1245 * @offset: the offset to write to 1245 * @offset: the offset to write to
1246 * @len: the number of bytes being written 1246 * @len: the number of bytes being written
1247 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1248 * 1247 *
1249 * Returns: errno 1248 * Returns: 1 if an alloc is required, 0 otherwise
1250 */ 1249 */
1251 1250
1252int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, 1251int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1253 unsigned int len, int *alloc_required) 1252 unsigned int len)
1254{ 1253{
1255 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1254 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1256 struct buffer_head bh; 1255 struct buffer_head bh;
@@ -1258,26 +1257,23 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1258 u64 lblock, lblock_stop, size; 1257 u64 lblock, lblock_stop, size;
1259 u64 end_of_file; 1258 u64 end_of_file;
1260 1259
1261 *alloc_required = 0;
1262
1263 if (!len) 1260 if (!len)
1264 return 0; 1261 return 0;
1265 1262
1266 if (gfs2_is_stuffed(ip)) { 1263 if (gfs2_is_stuffed(ip)) {
1267 if (offset + len > 1264 if (offset + len >
1268 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) 1265 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1269 *alloc_required = 1; 1266 return 1;
1270 return 0; 1267 return 0;
1271 } 1268 }
1272 1269
1273 *alloc_required = 1;
1274 shift = sdp->sd_sb.sb_bsize_shift; 1270 shift = sdp->sd_sb.sb_bsize_shift;
1275 BUG_ON(gfs2_is_dir(ip)); 1271 BUG_ON(gfs2_is_dir(ip));
1276 end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; 1272 end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
1277 lblock = offset >> shift; 1273 lblock = offset >> shift;
1278 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 1274 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1279 if (lblock_stop > end_of_file) 1275 if (lblock_stop > end_of_file)
1280 return 0; 1276 return 1;
1281 1277
1282 size = (lblock_stop - lblock) << shift; 1278 size = (lblock_stop - lblock) << shift;
1283 do { 1279 do {
@@ -1285,12 +1281,11 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1285 bh.b_size = size; 1281 bh.b_size = size;
1286 gfs2_block_map(&ip->i_inode, lblock, &bh, 0); 1282 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1287 if (!buffer_mapped(&bh)) 1283 if (!buffer_mapped(&bh))
1288 return 0; 1284 return 1;
1289 size -= bh.b_size; 1285 size -= bh.b_size;
1290 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 1286 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1291 } while(size > 0); 1287 } while(size > 0);
1292 1288
1293 *alloc_required = 0;
1294 return 0; 1289 return 0;
1295} 1290}
1296 1291
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c983177e05ac..a20a5213135a 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -52,6 +52,6 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
52int gfs2_truncatei_resume(struct gfs2_inode *ip); 52int gfs2_truncatei_resume(struct gfs2_inode *ip);
53int gfs2_file_dealloc(struct gfs2_inode *ip); 53int gfs2_file_dealloc(struct gfs2_inode *ip);
54int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, 54int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
55 unsigned int len, int *alloc_required); 55 unsigned int len);
56 56
57#endif /* __BMAP_DOT_H__ */ 57#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 26ca3361a8bc..b9dd88a78dd4 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -955,7 +955,12 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
955 /* Change the pointers. 955 /* Change the pointers.
956 Don't bother distinguishing stuffed from non-stuffed. 956 Don't bother distinguishing stuffed from non-stuffed.
957 This code is complicated enough already. */ 957 This code is complicated enough already. */
958 lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL); 958 lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS);
959 if (!lp) {
960 error = -ENOMEM;
961 goto fail_brelse;
962 }
963
959 /* Change the pointers */ 964 /* Change the pointers */
960 for (x = 0; x < half_len; x++) 965 for (x = 0; x < half_len; x++)
961 lp[x] = cpu_to_be64(bn); 966 lp[x] = cpu_to_be64(bn);
@@ -1063,7 +1068,9 @@ static int dir_double_exhash(struct gfs2_inode *dip)
1063 1068
1064 /* Allocate both the "from" and "to" buffers in one big chunk */ 1069 /* Allocate both the "from" and "to" buffers in one big chunk */
1065 1070
1066 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL); 1071 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
1072 if (!buf)
1073 return -ENOMEM;
1067 1074
1068 for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { 1075 for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) {
1069 error = gfs2_dir_read_data(dip, (char *)buf, 1076 error = gfs2_dir_read_data(dip, (char *)buf,
@@ -1231,6 +1238,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1231 return 0; 1238 return 0;
1232} 1239}
1233 1240
1241static void *gfs2_alloc_sort_buffer(unsigned size)
1242{
1243 void *ptr = NULL;
1244
1245 if (size < KMALLOC_MAX_SIZE)
1246 ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
1247 if (!ptr)
1248 ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL);
1249 return ptr;
1250}
1251
1252static void gfs2_free_sort_buffer(void *ptr)
1253{
1254 if (is_vmalloc_addr(ptr))
1255 vfree(ptr);
1256 else
1257 kfree(ptr);
1258}
1259
1234static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, 1260static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1235 filldir_t filldir, int *copied, unsigned *depth, 1261 filldir_t filldir, int *copied, unsigned *depth,
1236 u64 leaf_no) 1262 u64 leaf_no)
@@ -1271,7 +1297,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1271 * 99 is the maximum number of entries that can fit in a single 1297 * 99 is the maximum number of entries that can fit in a single
1272 * leaf block. 1298 * leaf block.
1273 */ 1299 */
1274 larr = vmalloc((leaves + entries + 99) * sizeof(void *)); 1300 larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
1275 if (!larr) 1301 if (!larr)
1276 goto out; 1302 goto out;
1277 darr = (const struct gfs2_dirent **)(larr + leaves); 1303 darr = (const struct gfs2_dirent **)(larr + leaves);
@@ -1282,7 +1308,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1282 do { 1308 do {
1283 error = get_leaf(ip, lfn, &bh); 1309 error = get_leaf(ip, lfn, &bh);
1284 if (error) 1310 if (error)
1285 goto out_kfree; 1311 goto out_free;
1286 lf = (struct gfs2_leaf *)bh->b_data; 1312 lf = (struct gfs2_leaf *)bh->b_data;
1287 lfn = be64_to_cpu(lf->lf_next); 1313 lfn = be64_to_cpu(lf->lf_next);
1288 if (lf->lf_entries) { 1314 if (lf->lf_entries) {
@@ -1291,7 +1317,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1291 gfs2_dirent_gather, NULL, &g); 1317 gfs2_dirent_gather, NULL, &g);
1292 error = PTR_ERR(dent); 1318 error = PTR_ERR(dent);
1293 if (IS_ERR(dent)) 1319 if (IS_ERR(dent))
1294 goto out_kfree; 1320 goto out_free;
1295 if (entries2 != g.offset) { 1321 if (entries2 != g.offset) {
1296 fs_warn(sdp, "Number of entries corrupt in dir " 1322 fs_warn(sdp, "Number of entries corrupt in dir "
1297 "leaf %llu, entries2 (%u) != " 1323 "leaf %llu, entries2 (%u) != "
@@ -1300,7 +1326,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1300 entries2, g.offset); 1326 entries2, g.offset);
1301 1327
1302 error = -EIO; 1328 error = -EIO;
1303 goto out_kfree; 1329 goto out_free;
1304 } 1330 }
1305 error = 0; 1331 error = 0;
1306 larr[leaf++] = bh; 1332 larr[leaf++] = bh;
@@ -1312,10 +1338,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1312 BUG_ON(entries2 != entries); 1338 BUG_ON(entries2 != entries);
1313 error = do_filldir_main(ip, offset, opaque, filldir, darr, 1339 error = do_filldir_main(ip, offset, opaque, filldir, darr,
1314 entries, copied); 1340 entries, copied);
1315out_kfree: 1341out_free:
1316 for(i = 0; i < leaf; i++) 1342 for(i = 0; i < leaf; i++)
1317 brelse(larr[i]); 1343 brelse(larr[i]);
1318 vfree(larr); 1344 gfs2_free_sort_buffer(larr);
1319out: 1345out:
1320 return error; 1346 return error;
1321} 1347}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ed9a94f0ef15..4edd662c8232 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -351,7 +351,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
351 unsigned long last_index; 351 unsigned long last_index;
352 u64 pos = page->index << PAGE_CACHE_SHIFT; 352 u64 pos = page->index << PAGE_CACHE_SHIFT;
353 unsigned int data_blocks, ind_blocks, rblocks; 353 unsigned int data_blocks, ind_blocks, rblocks;
354 int alloc_required = 0;
355 struct gfs2_holder gh; 354 struct gfs2_holder gh;
356 struct gfs2_alloc *al; 355 struct gfs2_alloc *al;
357 int ret; 356 int ret;
@@ -364,8 +363,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
364 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 363 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
365 set_bit(GIF_SW_PAGED, &ip->i_flags); 364 set_bit(GIF_SW_PAGED, &ip->i_flags);
366 365
367 ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); 366 if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE))
368 if (ret || !alloc_required)
369 goto out_unlock; 367 goto out_unlock;
370 ret = -ENOMEM; 368 ret = -ENOMEM;
371 al = gfs2_alloc_get(ip); 369 al = gfs2_alloc_get(ip);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 0898f3ec8212..9adf8f924e08 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -328,6 +328,30 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
328} 328}
329 329
330/** 330/**
331 * do_error - Something unexpected has happened during a lock request
332 *
333 */
334
335static inline void do_error(struct gfs2_glock *gl, const int ret)
336{
337 struct gfs2_holder *gh, *tmp;
338
339 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
340 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
341 continue;
342 if (ret & LM_OUT_ERROR)
343 gh->gh_error = -EIO;
344 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
345 gh->gh_error = GLR_TRYFAILED;
346 else
347 continue;
348 list_del_init(&gh->gh_list);
349 trace_gfs2_glock_queue(gh, 0);
350 gfs2_holder_wake(gh);
351 }
352}
353
354/**
331 * do_promote - promote as many requests as possible on the current queue 355 * do_promote - promote as many requests as possible on the current queue
332 * @gl: The glock 356 * @gl: The glock
333 * 357 *
@@ -375,36 +399,13 @@ restart:
375 } 399 }
376 if (gh->gh_list.prev == &gl->gl_holders) 400 if (gh->gh_list.prev == &gl->gl_holders)
377 return 1; 401 return 1;
402 do_error(gl, 0);
378 break; 403 break;
379 } 404 }
380 return 0; 405 return 0;
381} 406}
382 407
383/** 408/**
384 * do_error - Something unexpected has happened during a lock request
385 *
386 */
387
388static inline void do_error(struct gfs2_glock *gl, const int ret)
389{
390 struct gfs2_holder *gh, *tmp;
391
392 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
393 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
394 continue;
395 if (ret & LM_OUT_ERROR)
396 gh->gh_error = -EIO;
397 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
398 gh->gh_error = GLR_TRYFAILED;
399 else
400 continue;
401 list_del_init(&gh->gh_list);
402 trace_gfs2_glock_queue(gh, 0);
403 gfs2_holder_wake(gh);
404 }
405}
406
407/**
408 * find_first_waiter - find the first gh that's waiting for the glock 409 * find_first_waiter - find the first gh that's waiting for the glock
409 * @gl: the glock 410 * @gl: the glock
410 */ 411 */
@@ -706,18 +707,8 @@ static void glock_work_func(struct work_struct *work)
706{ 707{
707 unsigned long delay = 0; 708 unsigned long delay = 0;
708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 709 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
709 struct gfs2_holder *gh;
710 int drop_ref = 0; 710 int drop_ref = 0;
711 711
712 if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
713 spin_lock(&gl->gl_spin);
714 gh = find_first_waiter(gl);
715 if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
716 test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
717 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
718 spin_unlock(&gl->gl_spin);
719 }
720
721 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { 712 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
722 finish_xmote(gl, gl->gl_reply); 713 finish_xmote(gl, gl->gl_reply);
723 drop_ref = 1; 714 drop_ref = 1;
@@ -1072,6 +1063,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
1072 1063
1073 spin_lock(&gl->gl_spin); 1064 spin_lock(&gl->gl_spin);
1074 add_to_queue(gh); 1065 add_to_queue(gh);
1066 if ((LM_FLAG_NOEXP & gh->gh_flags) &&
1067 test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
1068 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1075 run_queue(gl, 1); 1069 run_queue(gl, 1);
1076 spin_unlock(&gl->gl_spin); 1070 spin_unlock(&gl->gl_spin);
1077 1071
@@ -1329,6 +1323,36 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1329} 1323}
1330 1324
1331/** 1325/**
1326 * gfs2_should_freeze - Figure out if glock should be frozen
1327 * @gl: The glock in question
1328 *
1329 * Glocks are not frozen if (a) the result of the dlm operation is
1330 * an error, (b) the locking operation was an unlock operation or
1331 * (c) if there is a "noexp" flagged request anywhere in the queue
1332 *
1333 * Returns: 1 if freezing should occur, 0 otherwise
1334 */
1335
1336static int gfs2_should_freeze(const struct gfs2_glock *gl)
1337{
1338 const struct gfs2_holder *gh;
1339
1340 if (gl->gl_reply & ~LM_OUT_ST_MASK)
1341 return 0;
1342 if (gl->gl_target == LM_ST_UNLOCKED)
1343 return 0;
1344
1345 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1346 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1347 continue;
1348 if (LM_FLAG_NOEXP & gh->gh_flags)
1349 return 0;
1350 }
1351
1352 return 1;
1353}
1354
1355/**
1332 * gfs2_glock_complete - Callback used by locking 1356 * gfs2_glock_complete - Callback used by locking
1333 * @gl: Pointer to the glock 1357 * @gl: Pointer to the glock
1334 * @ret: The return value from the dlm 1358 * @ret: The return value from the dlm
@@ -1338,18 +1362,17 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1338void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1362void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1339{ 1363{
1340 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 1364 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
1365
1341 gl->gl_reply = ret; 1366 gl->gl_reply = ret;
1367
1342 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { 1368 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
1343 struct gfs2_holder *gh;
1344 spin_lock(&gl->gl_spin); 1369 spin_lock(&gl->gl_spin);
1345 gh = find_first_waiter(gl); 1370 if (gfs2_should_freeze(gl)) {
1346 if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
1347 (gl->gl_target != LM_ST_UNLOCKED)) ||
1348 ((ret & ~LM_OUT_ST_MASK) != 0))
1349 set_bit(GLF_FROZEN, &gl->gl_flags); 1371 set_bit(GLF_FROZEN, &gl->gl_flags);
1350 spin_unlock(&gl->gl_spin); 1372 spin_unlock(&gl->gl_spin);
1351 if (test_bit(GLF_FROZEN, &gl->gl_flags))
1352 return; 1373 return;
1374 }
1375 spin_unlock(&gl->gl_spin);
1353 } 1376 }
1354 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1377 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1355 gfs2_glock_hold(gl); 1378 gfs2_glock_hold(gl);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b5d7363b22da..fdbf4b366fa5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,7 +12,6 @@
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/workqueue.h> 14#include <linux/workqueue.h>
15#include <linux/slow-work.h>
16#include <linux/dlm.h> 15#include <linux/dlm.h>
17#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
18 17
@@ -383,7 +382,7 @@ struct gfs2_journal_extent {
383struct gfs2_jdesc { 382struct gfs2_jdesc {
384 struct list_head jd_list; 383 struct list_head jd_list;
385 struct list_head extent_list; 384 struct list_head extent_list;
386 struct slow_work jd_work; 385 struct work_struct jd_work;
387 struct inode *jd_inode; 386 struct inode *jd_inode;
388 unsigned long jd_flags; 387 unsigned long jd_flags;
389#define JDF_RECOVERY 1 388#define JDF_RECOVERY 1
@@ -460,6 +459,7 @@ enum {
460 SDF_NOBARRIERS = 3, 459 SDF_NOBARRIERS = 3,
461 SDF_NORECOVERY = 4, 460 SDF_NORECOVERY = 4,
462 SDF_DEMOTE = 5, 461 SDF_DEMOTE = 5,
462 SDF_NOJOURNALID = 6,
463}; 463};
464 464
465#define GFS2_FSNAME_LEN 256 465#define GFS2_FSNAME_LEN 256
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f03afd9c44bc..08140f185a37 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -84,7 +84,7 @@ static int iget_skip_test(struct inode *inode, void *opaque)
84 struct gfs2_skip_data *data = opaque; 84 struct gfs2_skip_data *data = opaque;
85 85
86 if (ip->i_no_addr == data->no_addr) { 86 if (ip->i_no_addr == data->no_addr) {
87 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 87 if (inode->i_state & (I_FREEING|I_WILL_FREE)){
88 data->skipped = 1; 88 data->skipped = 1;
89 return 0; 89 return 0;
90 } 90 }
@@ -991,18 +991,29 @@ fail:
991 991
992static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 992static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
993{ 993{
994 struct inode *inode = &ip->i_inode;
994 struct buffer_head *dibh; 995 struct buffer_head *dibh;
995 int error; 996 int error;
996 997
997 error = gfs2_meta_inode_buffer(ip, &dibh); 998 error = gfs2_meta_inode_buffer(ip, &dibh);
998 if (!error) { 999 if (error)
999 error = inode_setattr(&ip->i_inode, attr); 1000 return error;
1000 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1001
1001 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1002 if ((attr->ia_valid & ATTR_SIZE) &&
1002 gfs2_dinode_out(ip, dibh->b_data); 1003 attr->ia_size != i_size_read(inode)) {
1003 brelse(dibh); 1004 error = vmtruncate(inode, attr->ia_size);
1005 if (error)
1006 return error;
1004 } 1007 }
1005 return error; 1008
1009 setattr_copy(inode, attr);
1010 mark_inode_dirty(inode);
1011
1012 gfs2_assert_warn(GFS2_SB(inode), !error);
1013 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1014 gfs2_dinode_out(ip, dibh->b_data);
1015 brelse(dibh);
1016 return 0;
1006} 1017}
1007 1018
1008/** 1019/**
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index fb2a5f93b7c3..b1e9630eb46a 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -15,7 +15,6 @@
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <asm/atomic.h> 17#include <asm/atomic.h>
18#include <linux/slow-work.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
21#include "incore.h" 20#include "incore.h"
@@ -24,6 +23,7 @@
24#include "util.h" 23#include "util.h"
25#include "glock.h" 24#include "glock.h"
26#include "quota.h" 25#include "quota.h"
26#include "recovery.h"
27 27
28static struct shrinker qd_shrinker = { 28static struct shrinker qd_shrinker = {
29 .shrink = gfs2_shrink_qd_memory, 29 .shrink = gfs2_shrink_qd_memory,
@@ -138,9 +138,11 @@ static int __init init_gfs2_fs(void)
138 if (error) 138 if (error)
139 goto fail_unregister; 139 goto fail_unregister;
140 140
141 error = slow_work_register_user(THIS_MODULE); 141 error = -ENOMEM;
142 if (error) 142 gfs_recovery_wq = alloc_workqueue("gfs_recovery",
143 goto fail_slow; 143 WQ_NON_REENTRANT | WQ_RESCUER, 0);
144 if (!gfs_recovery_wq)
145 goto fail_wq;
144 146
145 gfs2_register_debugfs(); 147 gfs2_register_debugfs();
146 148
@@ -148,7 +150,7 @@ static int __init init_gfs2_fs(void)
148 150
149 return 0; 151 return 0;
150 152
151fail_slow: 153fail_wq:
152 unregister_filesystem(&gfs2meta_fs_type); 154 unregister_filesystem(&gfs2meta_fs_type);
153fail_unregister: 155fail_unregister:
154 unregister_filesystem(&gfs2_fs_type); 156 unregister_filesystem(&gfs2_fs_type);
@@ -190,7 +192,7 @@ static void __exit exit_gfs2_fs(void)
190 gfs2_unregister_debugfs(); 192 gfs2_unregister_debugfs();
191 unregister_filesystem(&gfs2_fs_type); 193 unregister_filesystem(&gfs2_fs_type);
192 unregister_filesystem(&gfs2meta_fs_type); 194 unregister_filesystem(&gfs2meta_fs_type);
193 slow_work_unregister_user(THIS_MODULE); 195 destroy_workqueue(gfs_recovery_wq);
194 196
195 kmem_cache_destroy(gfs2_quotad_cachep); 197 kmem_cache_destroy(gfs2_quotad_cachep);
196 kmem_cache_destroy(gfs2_rgrpd_cachep); 198 kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3593b3a7290e..4f44bdeb2f03 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,7 +17,6 @@
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/slow-work.h>
21#include <linux/quotaops.h> 20#include <linux/quotaops.h>
22 21
23#include "gfs2.h" 22#include "gfs2.h"
@@ -76,7 +75,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
76 75
77 sb->s_fs_info = sdp; 76 sb->s_fs_info = sdp;
78 sdp->sd_vfs = sb; 77 sdp->sd_vfs = sb;
79 78 set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
80 gfs2_tune_init(&sdp->sd_tune); 79 gfs2_tune_init(&sdp->sd_tune);
81 80
82 init_waitqueue_head(&sdp->sd_glock_wait); 81 init_waitqueue_head(&sdp->sd_glock_wait);
@@ -673,7 +672,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
673 break; 672 break;
674 673
675 INIT_LIST_HEAD(&jd->extent_list); 674 INIT_LIST_HEAD(&jd->extent_list);
676 slow_work_init(&jd->jd_work, &gfs2_recover_ops); 675 INIT_WORK(&jd->jd_work, gfs2_recover_func);
677 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); 676 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
678 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { 677 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
679 if (!jd->jd_inode) 678 if (!jd->jd_inode)
@@ -782,7 +781,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
782 if (sdp->sd_lockstruct.ls_first) { 781 if (sdp->sd_lockstruct.ls_first) {
783 unsigned int x; 782 unsigned int x;
784 for (x = 0; x < sdp->sd_journals; x++) { 783 for (x = 0; x < sdp->sd_journals; x++) {
785 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x)); 784 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
785 true);
786 if (error) { 786 if (error) {
787 fs_err(sdp, "error recovering journal %u: %d\n", 787 fs_err(sdp, "error recovering journal %u: %d\n",
788 x, error); 788 x, error);
@@ -792,7 +792,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
792 792
793 gfs2_others_may_mount(sdp); 793 gfs2_others_may_mount(sdp);
794 } else if (!sdp->sd_args.ar_spectator) { 794 } else if (!sdp->sd_args.ar_spectator) {
795 error = gfs2_recover_journal(sdp->sd_jdesc); 795 error = gfs2_recover_journal(sdp->sd_jdesc, true);
796 if (error) { 796 if (error) {
797 fs_err(sdp, "error recovering my journal: %d\n", error); 797 fs_err(sdp, "error recovering my journal: %d\n", error);
798 goto fail_jinode_gh; 798 goto fail_jinode_gh;
@@ -1050,7 +1050,8 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1050 ret = match_int(&tmp[0], &option); 1050 ret = match_int(&tmp[0], &option);
1051 if (ret || option < 0) 1051 if (ret || option < 0)
1052 goto hostdata_error; 1052 goto hostdata_error;
1053 ls->ls_jid = option; 1053 if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags))
1054 ls->ls_jid = option;
1054 break; 1055 break;
1055 case Opt_id: 1056 case Opt_id:
1056 /* Obsolete, but left for backward compat purposes */ 1057 /* Obsolete, but left for backward compat purposes */
@@ -1102,6 +1103,24 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
1102 lm->lm_unmount(sdp); 1103 lm->lm_unmount(sdp);
1103} 1104}
1104 1105
1106static int gfs2_journalid_wait(void *word)
1107{
1108 if (signal_pending(current))
1109 return -EINTR;
1110 schedule();
1111 return 0;
1112}
1113
1114static int wait_on_journal(struct gfs2_sbd *sdp)
1115{
1116 if (sdp->sd_args.ar_spectator)
1117 return 0;
1118 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
1119 return 0;
1120
1121 return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
1122}
1123
1105void gfs2_online_uevent(struct gfs2_sbd *sdp) 1124void gfs2_online_uevent(struct gfs2_sbd *sdp)
1106{ 1125{
1107 struct super_block *sb = sdp->sd_vfs; 1126 struct super_block *sb = sdp->sd_vfs;
@@ -1194,6 +1213,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1194 if (error) 1213 if (error)
1195 goto fail_locking; 1214 goto fail_locking;
1196 1215
1216 error = wait_on_journal(sdp);
1217 if (error)
1218 goto fail_sb;
1219
1197 error = init_inodes(sdp, DO); 1220 error = init_inodes(sdp, DO);
1198 if (error) 1221 if (error)
1199 goto fail_sb; 1222 goto fail_sb;
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 98cdd05f3316..1009be2c9737 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1072,7 +1072,7 @@ int gfs2_permission(struct inode *inode, int mask)
1072} 1072}
1073 1073
1074/* 1074/*
1075 * XXX: should be changed to have proper ordering by opencoding simple_setsize 1075 * XXX(truncate): the truncate_setsize calls should be moved to the end.
1076 */ 1076 */
1077static int setattr_size(struct inode *inode, struct iattr *attr) 1077static int setattr_size(struct inode *inode, struct iattr *attr)
1078{ 1078{
@@ -1084,10 +1084,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
1084 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1084 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1085 if (error) 1085 if (error)
1086 return error; 1086 return error;
1087 error = simple_setsize(inode, attr->ia_size); 1087 truncate_setsize(inode, attr->ia_size);
1088 gfs2_trans_end(sdp); 1088 gfs2_trans_end(sdp);
1089 if (error)
1090 return error;
1091 } 1089 }
1092 1090
1093 error = gfs2_truncatei(ip, attr->ia_size); 1091 error = gfs2_truncatei(ip, attr->ia_size);
@@ -1136,8 +1134,16 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1136 if (error) 1134 if (error)
1137 goto out_end_trans; 1135 goto out_end_trans;
1138 1136
1139 error = inode_setattr(inode, attr); 1137 if ((attr->ia_valid & ATTR_SIZE) &&
1140 gfs2_assert_warn(sdp, !error); 1138 attr->ia_size != i_size_read(inode)) {
1139 int error;
1140
1141 error = vmtruncate(inode, attr->ia_size);
1142 gfs2_assert_warn(sdp, !error);
1143 }
1144
1145 setattr_copy(inode, attr);
1146 mark_inode_dirty(inode);
1141 1147
1142 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1148 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1143 gfs2_dinode_out(ip, dibh->b_data); 1149 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8f02d3db8f42..1bc6b5695e6d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -787,15 +787,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
787 goto out; 787 goto out;
788 788
789 for (x = 0; x < num_qd; x++) { 789 for (x = 0; x < num_qd; x++) {
790 int alloc_required;
791
792 offset = qd2offset(qda[x]); 790 offset = qd2offset(qda[x]);
793 error = gfs2_write_alloc_required(ip, offset, 791 if (gfs2_write_alloc_required(ip, offset,
794 sizeof(struct gfs2_quota), 792 sizeof(struct gfs2_quota)))
795 &alloc_required);
796 if (error)
797 goto out_gunlock;
798 if (alloc_required)
799 nalloc++; 793 nalloc++;
800 } 794 }
801 795
@@ -1455,10 +1449,10 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
1455 1449
1456 switch (sdp->sd_args.ar_quota) { 1450 switch (sdp->sd_args.ar_quota) {
1457 case GFS2_QUOTA_ON: 1451 case GFS2_QUOTA_ON:
1458 fqs->qs_flags |= (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); 1452 fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD);
1459 /*FALLTHRU*/ 1453 /*FALLTHRU*/
1460 case GFS2_QUOTA_ACCOUNT: 1454 case GFS2_QUOTA_ACCOUNT:
1461 fqs->qs_flags |= (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); 1455 fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT);
1462 break; 1456 break;
1463 case GFS2_QUOTA_OFF: 1457 case GFS2_QUOTA_OFF:
1464 break; 1458 break;
@@ -1504,7 +1498,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
1504 1498
1505 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 1499 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
1506 fdq->d_version = FS_DQUOT_VERSION; 1500 fdq->d_version = FS_DQUOT_VERSION;
1507 fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA; 1501 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1508 fdq->d_id = id; 1502 fdq->d_id = id;
1509 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); 1503 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
1510 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); 1504 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
@@ -1539,12 +1533,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1539 switch(type) { 1533 switch(type) {
1540 case USRQUOTA: 1534 case USRQUOTA:
1541 type = QUOTA_USER; 1535 type = QUOTA_USER;
1542 if (fdq->d_flags != XFS_USER_QUOTA) 1536 if (fdq->d_flags != FS_USER_QUOTA)
1543 return -EINVAL; 1537 return -EINVAL;
1544 break; 1538 break;
1545 case GRPQUOTA: 1539 case GRPQUOTA:
1546 type = QUOTA_GROUP; 1540 type = QUOTA_GROUP;
1547 if (fdq->d_flags != XFS_GROUP_QUOTA) 1541 if (fdq->d_flags != FS_GROUP_QUOTA)
1548 return -EINVAL; 1542 return -EINVAL;
1549 break; 1543 break;
1550 default: 1544 default:
@@ -1584,10 +1578,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1584 goto out_i; 1578 goto out_i;
1585 1579
1586 offset = qd2offset(qd); 1580 offset = qd2offset(qd);
1587 error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota), 1581 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
1588 &alloc_required);
1589 if (error)
1590 goto out_i;
1591 if (alloc_required) { 1582 if (alloc_required) {
1592 al = gfs2_alloc_get(ip); 1583 al = gfs2_alloc_get(ip);
1593 if (al == NULL) 1584 if (al == NULL)
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 4b9bece3d437..f7f89a94a5a4 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -14,7 +14,6 @@
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 16#include <linux/crc32.h>
17#include <linux/slow-work.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
20#include "incore.h" 19#include "incore.h"
@@ -28,6 +27,8 @@
28#include "util.h" 27#include "util.h"
29#include "dir.h" 28#include "dir.h"
30 29
30struct workqueue_struct *gfs_recovery_wq;
31
31int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, 32int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
32 struct buffer_head **bh) 33 struct buffer_head **bh)
33{ 34{
@@ -443,23 +444,7 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
443 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); 444 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
444} 445}
445 446
446static int gfs2_recover_get_ref(struct slow_work *work) 447void gfs2_recover_func(struct work_struct *work)
447{
448 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
449 if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
450 return -EBUSY;
451 return 0;
452}
453
454static void gfs2_recover_put_ref(struct slow_work *work)
455{
456 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
457 clear_bit(JDF_RECOVERY, &jd->jd_flags);
458 smp_mb__after_clear_bit();
459 wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
460}
461
462static void gfs2_recover_work(struct slow_work *work)
463{ 448{
464 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); 449 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
465 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 450 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
@@ -578,7 +563,7 @@ static void gfs2_recover_work(struct slow_work *work)
578 gfs2_glock_dq_uninit(&j_gh); 563 gfs2_glock_dq_uninit(&j_gh);
579 564
580 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); 565 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
581 return; 566 goto done;
582 567
583fail_gunlock_tr: 568fail_gunlock_tr:
584 gfs2_glock_dq_uninit(&t_gh); 569 gfs2_glock_dq_uninit(&t_gh);
@@ -590,32 +575,35 @@ fail_gunlock_j:
590 } 575 }
591 576
592 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); 577 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
593
594fail: 578fail:
595 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); 579 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
580done:
581 clear_bit(JDF_RECOVERY, &jd->jd_flags);
582 smp_mb__after_clear_bit();
583 wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
596} 584}
597 585
598struct slow_work_ops gfs2_recover_ops = {
599 .owner = THIS_MODULE,
600 .get_ref = gfs2_recover_get_ref,
601 .put_ref = gfs2_recover_put_ref,
602 .execute = gfs2_recover_work,
603};
604
605
606static int gfs2_recovery_wait(void *word) 586static int gfs2_recovery_wait(void *word)
607{ 587{
608 schedule(); 588 schedule();
609 return 0; 589 return 0;
610} 590}
611 591
612int gfs2_recover_journal(struct gfs2_jdesc *jd) 592int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
613{ 593{
614 int rv; 594 int rv;
615 rv = slow_work_enqueue(&jd->jd_work); 595
616 if (rv) 596 if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
617 return rv; 597 return -EBUSY;
618 wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); 598
599 /* we have JDF_RECOVERY, queue should always succeed */
600 rv = queue_work(gfs_recovery_wq, &jd->jd_work);
601 BUG_ON(!rv);
602
603 if (wait)
604 wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
605 TASK_UNINTERRUPTIBLE);
606
619 return 0; 607 return 0;
620} 608}
621 609
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 1616ac22569a..2226136c7647 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -12,6 +12,8 @@
12 12
13#include "incore.h" 13#include "incore.h"
14 14
15extern struct workqueue_struct *gfs_recovery_wq;
16
15static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) 17static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
16{ 18{
17 if (++*blk == sdp->sd_jdesc->jd_blocks) 19 if (++*blk == sdp->sd_jdesc->jd_blocks)
@@ -27,8 +29,8 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp);
27 29
28extern int gfs2_find_jhead(struct gfs2_jdesc *jd, 30extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
29 struct gfs2_log_header_host *head); 31 struct gfs2_log_header_host *head);
30extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); 32extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
31extern struct slow_work_ops gfs2_recover_ops; 33extern void gfs2_recover_func(struct work_struct *work);
32 34
33#endif /* __RECOVERY_DOT_H__ */ 35#endif /* __RECOVERY_DOT_H__ */
34 36
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4d1aad38f1b1..77cb9f830ee4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -342,8 +342,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
342{ 342{
343 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 343 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
344 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 344 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
345 int ar;
346 int error;
347 345
348 if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || 346 if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) ||
349 (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { 347 (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) {
@@ -352,13 +350,12 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
352 } 350 }
353 jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; 351 jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
354 352
355 error = gfs2_write_alloc_required(ip, 0, ip->i_disksize, &ar); 353 if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) {
356 if (!error && ar) {
357 gfs2_consist_inode(ip); 354 gfs2_consist_inode(ip);
358 error = -EIO; 355 return -EIO;
359 } 356 }
360 357
361 return error; 358 return 0;
362} 359}
363 360
364/** 361/**
@@ -1191,7 +1188,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1191 * node for later deallocation. 1188 * node for later deallocation.
1192 */ 1189 */
1193 1190
1194static void gfs2_drop_inode(struct inode *inode) 1191static int gfs2_drop_inode(struct inode *inode)
1195{ 1192{
1196 struct gfs2_inode *ip = GFS2_I(inode); 1193 struct gfs2_inode *ip = GFS2_I(inode);
1197 1194
@@ -1200,26 +1197,7 @@ static void gfs2_drop_inode(struct inode *inode)
1200 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) 1197 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
1201 clear_nlink(inode); 1198 clear_nlink(inode);
1202 } 1199 }
1203 generic_drop_inode(inode); 1200 return generic_drop_inode(inode);
1204}
1205
1206/**
1207 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
1208 * @inode: The VFS inode
1209 *
1210 */
1211
1212static void gfs2_clear_inode(struct inode *inode)
1213{
1214 struct gfs2_inode *ip = GFS2_I(inode);
1215
1216 ip->i_gl->gl_object = NULL;
1217 gfs2_glock_put(ip->i_gl);
1218 ip->i_gl = NULL;
1219 if (ip->i_iopen_gh.gh_gl) {
1220 ip->i_iopen_gh.gh_gl->gl_object = NULL;
1221 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1222 }
1223} 1201}
1224 1202
1225static int is_ancestor(const struct dentry *d1, const struct dentry *d2) 1203static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
@@ -1347,13 +1325,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1347 * is safe, just less efficient. 1325 * is safe, just less efficient.
1348 */ 1326 */
1349 1327
1350static void gfs2_delete_inode(struct inode *inode) 1328static void gfs2_evict_inode(struct inode *inode)
1351{ 1329{
1352 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 1330 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
1353 struct gfs2_inode *ip = GFS2_I(inode); 1331 struct gfs2_inode *ip = GFS2_I(inode);
1354 struct gfs2_holder gh; 1332 struct gfs2_holder gh;
1355 int error; 1333 int error;
1356 1334
1335 if (inode->i_nlink)
1336 goto out;
1337
1357 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1338 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1358 if (unlikely(error)) { 1339 if (unlikely(error)) {
1359 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1340 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
@@ -1407,10 +1388,18 @@ out_unlock:
1407 gfs2_holder_uninit(&ip->i_iopen_gh); 1388 gfs2_holder_uninit(&ip->i_iopen_gh);
1408 gfs2_glock_dq_uninit(&gh); 1389 gfs2_glock_dq_uninit(&gh);
1409 if (error && error != GLR_TRYFAILED && error != -EROFS) 1390 if (error && error != GLR_TRYFAILED && error != -EROFS)
1410 fs_warn(sdp, "gfs2_delete_inode: %d\n", error); 1391 fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
1411out: 1392out:
1412 truncate_inode_pages(&inode->i_data, 0); 1393 truncate_inode_pages(&inode->i_data, 0);
1413 clear_inode(inode); 1394 end_writeback(inode);
1395
1396 ip->i_gl->gl_object = NULL;
1397 gfs2_glock_put(ip->i_gl);
1398 ip->i_gl = NULL;
1399 if (ip->i_iopen_gh.gh_gl) {
1400 ip->i_iopen_gh.gh_gl->gl_object = NULL;
1401 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1402 }
1414} 1403}
1415 1404
1416static struct inode *gfs2_alloc_inode(struct super_block *sb) 1405static struct inode *gfs2_alloc_inode(struct super_block *sb)
@@ -1434,14 +1423,13 @@ const struct super_operations gfs2_super_ops = {
1434 .alloc_inode = gfs2_alloc_inode, 1423 .alloc_inode = gfs2_alloc_inode,
1435 .destroy_inode = gfs2_destroy_inode, 1424 .destroy_inode = gfs2_destroy_inode,
1436 .write_inode = gfs2_write_inode, 1425 .write_inode = gfs2_write_inode,
1437 .delete_inode = gfs2_delete_inode, 1426 .evict_inode = gfs2_evict_inode,
1438 .put_super = gfs2_put_super, 1427 .put_super = gfs2_put_super,
1439 .sync_fs = gfs2_sync_fs, 1428 .sync_fs = gfs2_sync_fs,
1440 .freeze_fs = gfs2_freeze, 1429 .freeze_fs = gfs2_freeze,
1441 .unfreeze_fs = gfs2_unfreeze, 1430 .unfreeze_fs = gfs2_unfreeze,
1442 .statfs = gfs2_statfs, 1431 .statfs = gfs2_statfs,
1443 .remount_fs = gfs2_remount_fs, 1432 .remount_fs = gfs2_remount_fs,
1444 .clear_inode = gfs2_clear_inode,
1445 .drop_inode = gfs2_drop_inode, 1433 .drop_inode = gfs2_drop_inode,
1446 .show_options = gfs2_show_options, 1434 .show_options = gfs2_show_options,
1447}; 1435};
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 37f5393e68e6..ccacffd2faaa 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -25,6 +25,7 @@
25#include "quota.h" 25#include "quota.h"
26#include "util.h" 26#include "util.h"
27#include "glops.h" 27#include "glops.h"
28#include "recovery.h"
28 29
29struct gfs2_attr { 30struct gfs2_attr {
30 struct attribute attr; 31 struct attribute attr;
@@ -325,6 +326,30 @@ static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
325 return sprintf(buf, "%d\n", ls->ls_first); 326 return sprintf(buf, "%d\n", ls->ls_first);
326} 327}
327 328
329static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
330{
331 unsigned first;
332 int rv;
333
334 rv = sscanf(buf, "%u", &first);
335 if (rv != 1 || first > 1)
336 return -EINVAL;
337 spin_lock(&sdp->sd_jindex_spin);
338 rv = -EBUSY;
339 if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
340 goto out;
341 rv = -EINVAL;
342 if (sdp->sd_args.ar_spectator)
343 goto out;
344 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
345 goto out;
346 sdp->sd_lockstruct.ls_first = first;
347 rv = 0;
348out:
349 spin_unlock(&sdp->sd_jindex_spin);
350 return rv ? rv : len;
351}
352
328static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) 353static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
329{ 354{
330 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 355 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -352,7 +377,7 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
352 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { 377 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
353 if (jd->jd_jid != jid) 378 if (jd->jd_jid != jid)
354 continue; 379 continue;
355 rv = slow_work_enqueue(&jd->jd_work); 380 rv = gfs2_recover_journal(jd, false);
356 break; 381 break;
357 } 382 }
358out: 383out:
@@ -377,14 +402,41 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
377 return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); 402 return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid);
378} 403}
379 404
405static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
406{
407 unsigned jid;
408 int rv;
409
410 rv = sscanf(buf, "%u", &jid);
411 if (rv != 1)
412 return -EINVAL;
413
414 spin_lock(&sdp->sd_jindex_spin);
415 rv = -EINVAL;
416 if (sdp->sd_args.ar_spectator)
417 goto out;
418 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
419 goto out;
420 rv = -EBUSY;
421 if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
422 goto out;
423 sdp->sd_lockstruct.ls_jid = jid;
424 smp_mb__after_clear_bit();
425 wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
426 rv = 0;
427out:
428 spin_unlock(&sdp->sd_jindex_spin);
429 return rv ? rv : len;
430}
431
380#define GDLM_ATTR(_name,_mode,_show,_store) \ 432#define GDLM_ATTR(_name,_mode,_show,_store) \
381static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) 433static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
382 434
383GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 435GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
384GDLM_ATTR(block, 0644, block_show, block_store); 436GDLM_ATTR(block, 0644, block_show, block_store);
385GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 437GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
386GDLM_ATTR(jid, 0444, jid_show, NULL); 438GDLM_ATTR(jid, 0644, jid_show, jid_store);
387GDLM_ATTR(first, 0444, lkfirst_show, NULL); 439GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
388GDLM_ATTR(first_done, 0444, first_done_show, NULL); 440GDLM_ATTR(first_done, 0444, first_done_show, NULL);
389GDLM_ATTR(recover, 0600, NULL, recover_store); 441GDLM_ATTR(recover, 0600, NULL, recover_store);
390GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 442GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
@@ -564,7 +616,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
564 616
565 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 617 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
566 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 618 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
567 if (!sdp->sd_args.ar_spectator) 619 if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
568 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); 620 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
569 if (gfs2_uuid_valid(uuid)) 621 if (gfs2_uuid_valid(uuid))
570 add_uevent_var(env, "UUID=%pUB", uuid); 622 add_uevent_var(env, "UUID=%pUB", uuid);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 82f93da00d1b..776af6eb4bcb 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,6 +1296,7 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct inode *inode = &ip->i_inode;
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1300 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1300 struct gfs2_ea_location el; 1301 struct gfs2_ea_location el;
1301 struct buffer_head *dibh; 1302 struct buffer_head *dibh;
@@ -1321,14 +1322,25 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1321 return error; 1322 return error;
1322 1323
1323 error = gfs2_meta_inode_buffer(ip, &dibh); 1324 error = gfs2_meta_inode_buffer(ip, &dibh);
1324 if (!error) { 1325 if (error)
1325 error = inode_setattr(&ip->i_inode, attr); 1326 goto out_trans_end;
1326 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1327
1327 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1328 if ((attr->ia_valid & ATTR_SIZE) &&
1328 gfs2_dinode_out(ip, dibh->b_data); 1329 attr->ia_size != i_size_read(inode)) {
1329 brelse(dibh); 1330 int error;
1331
1332 error = vmtruncate(inode, attr->ia_size);
1333 gfs2_assert_warn(GFS2_SB(inode), !error);
1330 } 1334 }
1331 1335
1336 setattr_copy(inode, attr);
1337 mark_inode_dirty(inode);
1338
1339 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1340 gfs2_dinode_out(ip, dibh->b_data);
1341 brelse(dibh);
1342
1343out_trans_end:
1332 gfs2_trans_end(sdp); 1344 gfs2_trans_end(sdp);
1333 return error; 1345 return error;
1334} 1346}
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index fe35e3b626c4..4f55651aaa51 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -193,7 +193,7 @@ extern int hfs_inode_setattr(struct dentry *, struct iattr *);
193extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, 193extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
194 __be32 log_size, __be32 phys_size, u32 clump_size); 194 __be32 log_size, __be32 phys_size, u32 clump_size);
195extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *); 195extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *);
196extern void hfs_clear_inode(struct inode *); 196extern void hfs_evict_inode(struct inode *);
197extern void hfs_delete_inode(struct inode *); 197extern void hfs_delete_inode(struct inode *);
198 198
199/* attr.c */ 199/* attr.c */
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 14f5cb1b9fdc..397b7adc7ce6 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -39,10 +39,19 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping,
39 loff_t pos, unsigned len, unsigned flags, 39 loff_t pos, unsigned len, unsigned flags,
40 struct page **pagep, void **fsdata) 40 struct page **pagep, void **fsdata)
41{ 41{
42 int ret;
43
42 *pagep = NULL; 44 *pagep = NULL;
43 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 45 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
44 hfs_get_block, 46 hfs_get_block,
45 &HFS_I(mapping->host)->phys_size); 47 &HFS_I(mapping->host)->phys_size);
48 if (unlikely(ret)) {
49 loff_t isize = mapping->host->i_size;
50 if (pos + len > isize)
51 vmtruncate(mapping->host, isize);
52 }
53
54 return ret;
46} 55}
47 56
48static sector_t hfs_bmap(struct address_space *mapping, sector_t block) 57static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
@@ -112,9 +121,24 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
112{ 121{
113 struct file *file = iocb->ki_filp; 122 struct file *file = iocb->ki_filp;
114 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 123 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
124 ssize_t ret;
115 125
116 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 126 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
117 offset, nr_segs, hfs_get_block, NULL); 127 offset, nr_segs, hfs_get_block, NULL);
128
129 /*
130 * In case of error extending write may have instantiated a few
131 * blocks outside i_size. Trim these off again.
132 */
133 if (unlikely((rw & WRITE) && ret < 0)) {
134 loff_t isize = i_size_read(inode);
135 loff_t end = offset + iov_length(iov, nr_segs);
136
137 if (end > isize)
138 vmtruncate(inode, isize);
139 }
140
141 return ret;
118} 142}
119 143
120static int hfs_writepages(struct address_space *mapping, 144static int hfs_writepages(struct address_space *mapping,
@@ -507,8 +531,10 @@ out:
507 return NULL; 531 return NULL;
508} 532}
509 533
510void hfs_clear_inode(struct inode *inode) 534void hfs_evict_inode(struct inode *inode)
511{ 535{
536 truncate_inode_pages(&inode->i_data, 0);
537 end_writeback(inode);
512 if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { 538 if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
513 HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; 539 HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
514 iput(HFS_I(inode)->rsrc_inode); 540 iput(HFS_I(inode)->rsrc_inode);
@@ -588,13 +614,43 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
588 attr->ia_mode = inode->i_mode & ~S_IWUGO; 614 attr->ia_mode = inode->i_mode & ~S_IWUGO;
589 attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; 615 attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask;
590 } 616 }
591 error = inode_setattr(inode, attr);
592 if (error)
593 return error;
594 617
618 if ((attr->ia_valid & ATTR_SIZE) &&
619 attr->ia_size != i_size_read(inode)) {
620 error = vmtruncate(inode, attr->ia_size);
621 if (error)
622 return error;
623 }
624
625 setattr_copy(inode, attr);
626 mark_inode_dirty(inode);
595 return 0; 627 return 0;
596} 628}
597 629
630static int hfs_file_fsync(struct file *filp, int datasync)
631{
632 struct inode *inode = filp->f_mapping->host;
633 struct super_block * sb;
634 int ret, err;
635
636 /* sync the inode to buffers */
637 ret = write_inode_now(inode, 0);
638
639 /* sync the superblock to buffers */
640 sb = inode->i_sb;
641 if (sb->s_dirt) {
642 lock_super(sb);
643 sb->s_dirt = 0;
644 if (!(sb->s_flags & MS_RDONLY))
645 hfs_mdb_commit(sb);
646 unlock_super(sb);
647 }
648 /* .. finally sync the buffers to disk */
649 err = sync_blockdev(sb->s_bdev);
650 if (!ret)
651 ret = err;
652 return ret;
653}
598 654
599static const struct file_operations hfs_file_operations = { 655static const struct file_operations hfs_file_operations = {
600 .llseek = generic_file_llseek, 656 .llseek = generic_file_llseek,
@@ -604,7 +660,7 @@ static const struct file_operations hfs_file_operations = {
604 .aio_write = generic_file_aio_write, 660 .aio_write = generic_file_aio_write,
605 .mmap = generic_file_mmap, 661 .mmap = generic_file_mmap,
606 .splice_read = generic_file_splice_read, 662 .splice_read = generic_file_splice_read,
607 .fsync = file_fsync, 663 .fsync = hfs_file_fsync,
608 .open = hfs_file_open, 664 .open = hfs_file_open,
609 .release = hfs_file_release, 665 .release = hfs_file_release,
610}; 666};
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0a81eb7111f3..34235d4bf08b 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -181,7 +181,7 @@ static const struct super_operations hfs_super_operations = {
181 .alloc_inode = hfs_alloc_inode, 181 .alloc_inode = hfs_alloc_inode,
182 .destroy_inode = hfs_destroy_inode, 182 .destroy_inode = hfs_destroy_inode,
183 .write_inode = hfs_write_inode, 183 .write_inode = hfs_write_inode,
184 .clear_inode = hfs_clear_inode, 184 .evict_inode = hfs_evict_inode,
185 .put_super = hfs_put_super, 185 .put_super = hfs_put_super,
186 .write_super = hfs_write_super, 186 .write_super = hfs_write_super,
187 .sync_fs = hfs_sync_fs, 187 .sync_fs = hfs_sync_fs,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 6505c30ad965..dc856be3c2b0 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -351,6 +351,7 @@ int hfsplus_show_options(struct seq_file *, struct vfsmount *);
351 351
352/* super.c */ 352/* super.c */
353struct inode *hfsplus_iget(struct super_block *, unsigned long); 353struct inode *hfsplus_iget(struct super_block *, unsigned long);
354int hfsplus_sync_fs(struct super_block *sb, int wait);
354 355
355/* tables.c */ 356/* tables.c */
356extern u16 hfsplus_case_fold_table[]; 357extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9bbb82924a22..c5a979d62c65 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -31,10 +31,19 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
31 loff_t pos, unsigned len, unsigned flags, 31 loff_t pos, unsigned len, unsigned flags,
32 struct page **pagep, void **fsdata) 32 struct page **pagep, void **fsdata)
33{ 33{
34 int ret;
35
34 *pagep = NULL; 36 *pagep = NULL;
35 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 37 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
36 hfsplus_get_block, 38 hfsplus_get_block,
37 &HFSPLUS_I(mapping->host).phys_size); 39 &HFSPLUS_I(mapping->host).phys_size);
40 if (unlikely(ret)) {
41 loff_t isize = mapping->host->i_size;
42 if (pos + len > isize)
43 vmtruncate(mapping->host, isize);
44 }
45
46 return ret;
38} 47}
39 48
40static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) 49static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
@@ -105,9 +114,24 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
105{ 114{
106 struct file *file = iocb->ki_filp; 115 struct file *file = iocb->ki_filp;
107 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 116 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
117 ssize_t ret;
108 118
109 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 119 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
110 offset, nr_segs, hfsplus_get_block, NULL); 120 offset, nr_segs, hfsplus_get_block, NULL);
121
122 /*
123 * In case of error extending write may have instantiated a few
124 * blocks outside i_size. Trim these off again.
125 */
126 if (unlikely((rw & WRITE) && ret < 0)) {
127 loff_t isize = i_size_read(inode);
128 loff_t end = offset + iov_length(iov, nr_segs);
129
130 if (end > isize)
131 vmtruncate(inode, isize);
132 }
133
134 return ret;
111} 135}
112 136
113static int hfsplus_writepages(struct address_space *mapping, 137static int hfsplus_writepages(struct address_space *mapping,
@@ -266,9 +290,56 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
266 return 0; 290 return 0;
267} 291}
268 292
293static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
294{
295 struct inode *inode = dentry->d_inode;
296 int error;
297
298 error = inode_change_ok(inode, attr);
299 if (error)
300 return error;
301
302 if ((attr->ia_valid & ATTR_SIZE) &&
303 attr->ia_size != i_size_read(inode)) {
304 error = vmtruncate(inode, attr->ia_size);
305 if (error)
306 return error;
307 }
308
309 setattr_copy(inode, attr);
310 mark_inode_dirty(inode);
311 return 0;
312}
313
314static int hfsplus_file_fsync(struct file *filp, int datasync)
315{
316 struct inode *inode = filp->f_mapping->host;
317 struct super_block * sb;
318 int ret, err;
319
320 /* sync the inode to buffers */
321 ret = write_inode_now(inode, 0);
322
323 /* sync the superblock to buffers */
324 sb = inode->i_sb;
325 if (sb->s_dirt) {
326 if (!(sb->s_flags & MS_RDONLY))
327 hfsplus_sync_fs(sb, 1);
328 else
329 sb->s_dirt = 0;
330 }
331
332 /* .. finally sync the buffers to disk */
333 err = sync_blockdev(sb->s_bdev);
334 if (!ret)
335 ret = err;
336 return ret;
337}
338
269static const struct inode_operations hfsplus_file_inode_operations = { 339static const struct inode_operations hfsplus_file_inode_operations = {
270 .lookup = hfsplus_file_lookup, 340 .lookup = hfsplus_file_lookup,
271 .truncate = hfsplus_file_truncate, 341 .truncate = hfsplus_file_truncate,
342 .setattr = hfsplus_setattr,
272 .setxattr = hfsplus_setxattr, 343 .setxattr = hfsplus_setxattr,
273 .getxattr = hfsplus_getxattr, 344 .getxattr = hfsplus_getxattr,
274 .listxattr = hfsplus_listxattr, 345 .listxattr = hfsplus_listxattr,
@@ -282,7 +353,7 @@ static const struct file_operations hfsplus_file_operations = {
282 .aio_write = generic_file_aio_write, 353 .aio_write = generic_file_aio_write,
283 .mmap = generic_file_mmap, 354 .mmap = generic_file_mmap,
284 .splice_read = generic_file_splice_read, 355 .splice_read = generic_file_splice_read,
285 .fsync = file_fsync, 356 .fsync = hfsplus_file_fsync,
286 .open = hfsplus_file_open, 357 .open = hfsplus_file_open,
287 .release = hfsplus_file_release, 358 .release = hfsplus_file_release,
288 .unlocked_ioctl = hfsplus_ioctl, 359 .unlocked_ioctl = hfsplus_ioctl,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 74b473a8ef92..3b55c050c742 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -145,16 +145,18 @@ static int hfsplus_write_inode(struct inode *inode,
145 return ret; 145 return ret;
146} 146}
147 147
148static void hfsplus_clear_inode(struct inode *inode) 148static void hfsplus_evict_inode(struct inode *inode)
149{ 149{
150 dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino); 150 dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
151 truncate_inode_pages(&inode->i_data, 0);
152 end_writeback(inode);
151 if (HFSPLUS_IS_RSRC(inode)) { 153 if (HFSPLUS_IS_RSRC(inode)) {
152 HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; 154 HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
153 iput(HFSPLUS_I(inode).rsrc_inode); 155 iput(HFSPLUS_I(inode).rsrc_inode);
154 } 156 }
155} 157}
156 158
157static int hfsplus_sync_fs(struct super_block *sb, int wait) 159int hfsplus_sync_fs(struct super_block *sb, int wait)
158{ 160{
159 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; 161 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
160 162
@@ -293,7 +295,7 @@ static const struct super_operations hfsplus_sops = {
293 .alloc_inode = hfsplus_alloc_inode, 295 .alloc_inode = hfsplus_alloc_inode,
294 .destroy_inode = hfsplus_destroy_inode, 296 .destroy_inode = hfsplus_destroy_inode,
295 .write_inode = hfsplus_write_inode, 297 .write_inode = hfsplus_write_inode,
296 .clear_inode = hfsplus_clear_inode, 298 .evict_inode = hfsplus_evict_inode,
297 .put_super = hfsplus_put_super, 299 .put_super = hfsplus_put_super,
298 .write_super = hfsplus_write_super, 300 .write_super = hfsplus_write_super,
299 .sync_fs = hfsplus_sync_fs, 301 .sync_fs = hfsplus_sync_fs,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 2f34f8f2134b..6bbd75c5589b 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -53,18 +53,28 @@ struct hostfs_iattr {
53 struct timespec ia_ctime; 53 struct timespec ia_ctime;
54}; 54};
55 55
56extern int stat_file(const char *path, unsigned long long *inode_out, 56struct hostfs_stat {
57 int *mode_out, int *nlink_out, int *uid_out, int *gid_out, 57 unsigned long long ino;
58 unsigned long long *size_out, struct timespec *atime_out, 58 unsigned int mode;
59 struct timespec *mtime_out, struct timespec *ctime_out, 59 unsigned int nlink;
60 int *blksize_out, unsigned long long *blocks_out, int fd); 60 unsigned int uid;
61 unsigned int gid;
62 unsigned long long size;
63 struct timespec atime, mtime, ctime;
64 unsigned int blksize;
65 unsigned long long blocks;
66 unsigned int maj;
67 unsigned int min;
68};
69
70extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
61extern int access_file(char *path, int r, int w, int x); 71extern int access_file(char *path, int r, int w, int x);
62extern int open_file(char *path, int r, int w, int append); 72extern int open_file(char *path, int r, int w, int append);
63extern int file_type(const char *path, int *maj, int *min);
64extern void *open_dir(char *path, int *err_out); 73extern void *open_dir(char *path, int *err_out);
65extern char *read_dir(void *stream, unsigned long long *pos, 74extern char *read_dir(void *stream, unsigned long long *pos,
66 unsigned long long *ino_out, int *len_out); 75 unsigned long long *ino_out, int *len_out);
67extern void close_file(void *stream); 76extern void close_file(void *stream);
77extern int replace_file(int oldfd, int fd);
68extern void close_dir(void *stream); 78extern void close_dir(void *stream);
69extern int read_file(int fd, unsigned long long *offset, char *buf, int len); 79extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
70extern int write_file(int fd, unsigned long long *offset, const char *buf, 80extern int write_file(int fd, unsigned long long *offset, const char *buf,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 87ac1891a185..dd1e55535a4e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -14,12 +14,12 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/namei.h>
17#include "hostfs.h" 18#include "hostfs.h"
18#include "init.h" 19#include "init.h"
19#include "kern.h" 20#include "kern.h"
20 21
21struct hostfs_inode_info { 22struct hostfs_inode_info {
22 char *host_filename;
23 int fd; 23 int fd;
24 fmode_t mode; 24 fmode_t mode;
25 struct inode vfs_inode; 25 struct inode vfs_inode;
@@ -49,7 +49,7 @@ static int append = 0;
49 49
50static const struct inode_operations hostfs_iops; 50static const struct inode_operations hostfs_iops;
51static const struct inode_operations hostfs_dir_iops; 51static const struct inode_operations hostfs_dir_iops;
52static const struct address_space_operations hostfs_link_aops; 52static const struct inode_operations hostfs_link_iops;
53 53
54#ifndef MODULE 54#ifndef MODULE
55static int __init hostfs_args(char *options, int *add) 55static int __init hostfs_args(char *options, int *add)
@@ -90,71 +90,58 @@ __uml_setup("hostfs=", hostfs_args,
90); 90);
91#endif 91#endif
92 92
93static char *dentry_name(struct dentry *dentry, int extra) 93static char *__dentry_name(struct dentry *dentry, char *name)
94{ 94{
95 struct dentry *parent; 95 char *p = __dentry_path(dentry, name, PATH_MAX);
96 char *root, *name; 96 char *root;
97 int len; 97 size_t len;
98
99 len = 0;
100 parent = dentry;
101 while (parent->d_parent != parent) {
102 len += parent->d_name.len + 1;
103 parent = parent->d_parent;
104 }
105 98
106 root = HOSTFS_I(parent->d_inode)->host_filename; 99 spin_unlock(&dcache_lock);
107 len += strlen(root);
108 name = kmalloc(len + extra + 1, GFP_KERNEL);
109 if (name == NULL)
110 return NULL;
111 100
112 name[len] = '\0'; 101 root = dentry->d_sb->s_fs_info;
113 parent = dentry; 102 len = strlen(root);
114 while (parent->d_parent != parent) { 103 if (IS_ERR(p)) {
115 len -= parent->d_name.len + 1; 104 __putname(name);
116 name[len] = '/'; 105 return NULL;
117 strncpy(&name[len + 1], parent->d_name.name, 106 }
118 parent->d_name.len); 107 strncpy(name, root, PATH_MAX);
119 parent = parent->d_parent; 108 if (len > p - name) {
109 __putname(name);
110 return NULL;
111 }
112 if (p > name + len) {
113 char *s = name + len;
114 while ((*s++ = *p++) != '\0')
115 ;
120 } 116 }
121 strncpy(name, root, strlen(root));
122 return name; 117 return name;
123} 118}
124 119
125static char *inode_name(struct inode *ino, int extra) 120static char *dentry_name(struct dentry *dentry)
126{ 121{
127 struct dentry *dentry; 122 char *name = __getname();
123 if (!name)
124 return NULL;
128 125
129 dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); 126 spin_lock(&dcache_lock);
130 return dentry_name(dentry, extra); 127 return __dentry_name(dentry, name); /* will unlock */
131} 128}
132 129
133static int read_name(struct inode *ino, char *name) 130static char *inode_name(struct inode *ino)
134{ 131{
135 /* 132 struct dentry *dentry;
136 * The non-int inode fields are copied into ints by stat_file and 133 char *name = __getname();
137 * then copied into the inode because passing the actual pointers 134 if (!name)
138 * in and having them treated as int * breaks on big-endian machines 135 return NULL;
139 */
140 int err;
141 int i_mode, i_nlink, i_blksize;
142 unsigned long long i_size;
143 unsigned long long i_ino;
144 unsigned long long i_blocks;
145
146 err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
147 &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
148 &ino->i_ctime, &i_blksize, &i_blocks, -1);
149 if (err)
150 return err;
151 136
152 ino->i_ino = i_ino; 137 spin_lock(&dcache_lock);
153 ino->i_mode = i_mode; 138 if (list_empty(&ino->i_dentry)) {
154 ino->i_nlink = i_nlink; 139 spin_unlock(&dcache_lock);
155 ino->i_size = i_size; 140 __putname(name);
156 ino->i_blocks = i_blocks; 141 return NULL;
157 return 0; 142 }
143 dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias);
144 return __dentry_name(dentry, name); /* will unlock */
158} 145}
159 146
160static char *follow_link(char *link) 147static char *follow_link(char *link)
@@ -205,53 +192,11 @@ static char *follow_link(char *link)
205 return ERR_PTR(n); 192 return ERR_PTR(n);
206} 193}
207 194
208static int hostfs_read_inode(struct inode *ino)
209{
210 char *name;
211 int err = 0;
212
213 /*
214 * Unfortunately, we are called from iget() when we don't have a dentry
215 * allocated yet.
216 */
217 if (list_empty(&ino->i_dentry))
218 goto out;
219
220 err = -ENOMEM;
221 name = inode_name(ino, 0);
222 if (name == NULL)
223 goto out;
224
225 if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) {
226 name = follow_link(name);
227 if (IS_ERR(name)) {
228 err = PTR_ERR(name);
229 goto out;
230 }
231 }
232
233 err = read_name(ino, name);
234 kfree(name);
235 out:
236 return err;
237}
238
239static struct inode *hostfs_iget(struct super_block *sb) 195static struct inode *hostfs_iget(struct super_block *sb)
240{ 196{
241 struct inode *inode; 197 struct inode *inode = new_inode(sb);
242 long ret;
243
244 inode = iget_locked(sb, 0);
245 if (!inode) 198 if (!inode)
246 return ERR_PTR(-ENOMEM); 199 return ERR_PTR(-ENOMEM);
247 if (inode->i_state & I_NEW) {
248 ret = hostfs_read_inode(inode);
249 if (ret < 0) {
250 iget_failed(inode);
251 return ERR_PTR(ret);
252 }
253 unlock_new_inode(inode);
254 }
255 return inode; 200 return inode;
256} 201}
257 202
@@ -269,7 +214,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
269 long long f_files; 214 long long f_files;
270 long long f_ffree; 215 long long f_ffree;
271 216
272 err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename, 217 err = do_statfs(dentry->d_sb->s_fs_info,
273 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, 218 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
274 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 219 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
275 &sf->f_namelen, sf->f_spare); 220 &sf->f_namelen, sf->f_spare);
@@ -288,47 +233,32 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
288{ 233{
289 struct hostfs_inode_info *hi; 234 struct hostfs_inode_info *hi;
290 235
291 hi = kmalloc(sizeof(*hi), GFP_KERNEL); 236 hi = kzalloc(sizeof(*hi), GFP_KERNEL);
292 if (hi == NULL) 237 if (hi == NULL)
293 return NULL; 238 return NULL;
294 239 hi->fd = -1;
295 *hi = ((struct hostfs_inode_info) { .host_filename = NULL,
296 .fd = -1,
297 .mode = 0 });
298 inode_init_once(&hi->vfs_inode); 240 inode_init_once(&hi->vfs_inode);
299 return &hi->vfs_inode; 241 return &hi->vfs_inode;
300} 242}
301 243
302static void hostfs_delete_inode(struct inode *inode) 244static void hostfs_evict_inode(struct inode *inode)
303{ 245{
304 truncate_inode_pages(&inode->i_data, 0); 246 truncate_inode_pages(&inode->i_data, 0);
247 end_writeback(inode);
305 if (HOSTFS_I(inode)->fd != -1) { 248 if (HOSTFS_I(inode)->fd != -1) {
306 close_file(&HOSTFS_I(inode)->fd); 249 close_file(&HOSTFS_I(inode)->fd);
307 HOSTFS_I(inode)->fd = -1; 250 HOSTFS_I(inode)->fd = -1;
308 } 251 }
309 clear_inode(inode);
310} 252}
311 253
312static void hostfs_destroy_inode(struct inode *inode) 254static void hostfs_destroy_inode(struct inode *inode)
313{ 255{
314 kfree(HOSTFS_I(inode)->host_filename);
315
316 /*
317 * XXX: This should not happen, probably. The check is here for
318 * additional safety.
319 */
320 if (HOSTFS_I(inode)->fd != -1) {
321 close_file(&HOSTFS_I(inode)->fd);
322 printk(KERN_DEBUG "Closing host fd in .destroy_inode\n");
323 }
324
325 kfree(HOSTFS_I(inode)); 256 kfree(HOSTFS_I(inode));
326} 257}
327 258
328static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 259static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
329{ 260{
330 struct inode *root = vfs->mnt_sb->s_root->d_inode; 261 const char *root_path = vfs->mnt_sb->s_fs_info;
331 const char *root_path = HOSTFS_I(root)->host_filename;
332 size_t offset = strlen(root_ino) + 1; 262 size_t offset = strlen(root_ino) + 1;
333 263
334 if (strlen(root_path) > offset) 264 if (strlen(root_path) > offset)
@@ -339,9 +269,8 @@ static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
339 269
340static const struct super_operations hostfs_sbops = { 270static const struct super_operations hostfs_sbops = {
341 .alloc_inode = hostfs_alloc_inode, 271 .alloc_inode = hostfs_alloc_inode,
342 .drop_inode = generic_delete_inode,
343 .delete_inode = hostfs_delete_inode,
344 .destroy_inode = hostfs_destroy_inode, 272 .destroy_inode = hostfs_destroy_inode,
273 .evict_inode = hostfs_evict_inode,
345 .statfs = hostfs_statfs, 274 .statfs = hostfs_statfs,
346 .show_options = hostfs_show_options, 275 .show_options = hostfs_show_options,
347}; 276};
@@ -353,11 +282,11 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
353 unsigned long long next, ino; 282 unsigned long long next, ino;
354 int error, len; 283 int error, len;
355 284
356 name = dentry_name(file->f_path.dentry, 0); 285 name = dentry_name(file->f_path.dentry);
357 if (name == NULL) 286 if (name == NULL)
358 return -ENOMEM; 287 return -ENOMEM;
359 dir = open_dir(name, &error); 288 dir = open_dir(name, &error);
360 kfree(name); 289 __putname(name);
361 if (dir == NULL) 290 if (dir == NULL)
362 return -error; 291 return -error;
363 next = file->f_pos; 292 next = file->f_pos;
@@ -373,40 +302,59 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
373 302
374int hostfs_file_open(struct inode *ino, struct file *file) 303int hostfs_file_open(struct inode *ino, struct file *file)
375{ 304{
305 static DEFINE_MUTEX(open_mutex);
376 char *name; 306 char *name;
377 fmode_t mode = 0; 307 fmode_t mode = 0;
308 int err;
378 int r = 0, w = 0, fd; 309 int r = 0, w = 0, fd;
379 310
380 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 311 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
381 if ((mode & HOSTFS_I(ino)->mode) == mode) 312 if ((mode & HOSTFS_I(ino)->mode) == mode)
382 return 0; 313 return 0;
383 314
384 /* 315 mode |= HOSTFS_I(ino)->mode;
385 * The file may already have been opened, but with the wrong access,
386 * so this resets things and reopens the file with the new access.
387 */
388 if (HOSTFS_I(ino)->fd != -1) {
389 close_file(&HOSTFS_I(ino)->fd);
390 HOSTFS_I(ino)->fd = -1;
391 }
392 316
393 HOSTFS_I(ino)->mode |= mode; 317retry:
394 if (HOSTFS_I(ino)->mode & FMODE_READ) 318 if (mode & FMODE_READ)
395 r = 1; 319 r = 1;
396 if (HOSTFS_I(ino)->mode & FMODE_WRITE) 320 if (mode & FMODE_WRITE)
397 w = 1; 321 w = 1;
398 if (w) 322 if (w)
399 r = 1; 323 r = 1;
400 324
401 name = dentry_name(file->f_path.dentry, 0); 325 name = dentry_name(file->f_path.dentry);
402 if (name == NULL) 326 if (name == NULL)
403 return -ENOMEM; 327 return -ENOMEM;
404 328
405 fd = open_file(name, r, w, append); 329 fd = open_file(name, r, w, append);
406 kfree(name); 330 __putname(name);
407 if (fd < 0) 331 if (fd < 0)
408 return fd; 332 return fd;
409 FILE_HOSTFS_I(file)->fd = fd; 333
334 mutex_lock(&open_mutex);
335 /* somebody else had handled it first? */
336 if ((mode & HOSTFS_I(ino)->mode) == mode) {
337 mutex_unlock(&open_mutex);
338 return 0;
339 }
340 if ((mode | HOSTFS_I(ino)->mode) != mode) {
341 mode |= HOSTFS_I(ino)->mode;
342 mutex_unlock(&open_mutex);
343 close_file(&fd);
344 goto retry;
345 }
346 if (HOSTFS_I(ino)->fd == -1) {
347 HOSTFS_I(ino)->fd = fd;
348 } else {
349 err = replace_file(fd, HOSTFS_I(ino)->fd);
350 close_file(&fd);
351 if (err < 0) {
352 mutex_unlock(&open_mutex);
353 return err;
354 }
355 }
356 HOSTFS_I(ino)->mode = mode;
357 mutex_unlock(&open_mutex);
410 358
411 return 0; 359 return 0;
412} 360}
@@ -544,54 +492,50 @@ static const struct address_space_operations hostfs_aops = {
544 .write_end = hostfs_write_end, 492 .write_end = hostfs_write_end,
545}; 493};
546 494
547static int init_inode(struct inode *inode, struct dentry *dentry) 495static int read_name(struct inode *ino, char *name)
548{ 496{
549 char *name; 497 dev_t rdev;
550 int type, err = -ENOMEM; 498 struct hostfs_stat st;
551 int maj, min; 499 int err = stat_file(name, &st, -1);
552 dev_t rdev = 0; 500 if (err)
501 return err;
553 502
554 if (dentry) { 503 /* Reencode maj and min with the kernel encoding.*/
555 name = dentry_name(dentry, 0); 504 rdev = MKDEV(st.maj, st.min);
556 if (name == NULL)
557 goto out;
558 type = file_type(name, &maj, &min);
559 /* Reencode maj and min with the kernel encoding.*/
560 rdev = MKDEV(maj, min);
561 kfree(name);
562 }
563 else type = OS_TYPE_DIR;
564 505
565 err = 0; 506 switch (st.mode & S_IFMT) {
566 if (type == OS_TYPE_SYMLINK) 507 case S_IFLNK:
567 inode->i_op = &page_symlink_inode_operations; 508 ino->i_op = &hostfs_link_iops;
568 else if (type == OS_TYPE_DIR)
569 inode->i_op = &hostfs_dir_iops;
570 else inode->i_op = &hostfs_iops;
571
572 if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
573 else inode->i_fop = &hostfs_file_fops;
574
575 if (type == OS_TYPE_SYMLINK)
576 inode->i_mapping->a_ops = &hostfs_link_aops;
577 else inode->i_mapping->a_ops = &hostfs_aops;
578
579 switch (type) {
580 case OS_TYPE_CHARDEV:
581 init_special_inode(inode, S_IFCHR, rdev);
582 break; 509 break;
583 case OS_TYPE_BLOCKDEV: 510 case S_IFDIR:
584 init_special_inode(inode, S_IFBLK, rdev); 511 ino->i_op = &hostfs_dir_iops;
512 ino->i_fop = &hostfs_dir_fops;
585 break; 513 break;
586 case OS_TYPE_FIFO: 514 case S_IFCHR:
587 init_special_inode(inode, S_IFIFO, 0); 515 case S_IFBLK:
516 case S_IFIFO:
517 case S_IFSOCK:
518 init_special_inode(ino, st.mode & S_IFMT, rdev);
519 ino->i_op = &hostfs_iops;
588 break; 520 break;
589 case OS_TYPE_SOCK: 521
590 init_special_inode(inode, S_IFSOCK, 0); 522 default:
591 break; 523 ino->i_op = &hostfs_iops;
592 } 524 ino->i_fop = &hostfs_file_fops;
593 out: 525 ino->i_mapping->a_ops = &hostfs_aops;
594 return err; 526 }
527
528 ino->i_ino = st.ino;
529 ino->i_mode = st.mode;
530 ino->i_nlink = st.nlink;
531 ino->i_uid = st.uid;
532 ino->i_gid = st.gid;
533 ino->i_atime = st.atime;
534 ino->i_mtime = st.mtime;
535 ino->i_ctime = st.ctime;
536 ino->i_size = st.size;
537 ino->i_blocks = st.blocks;
538 return 0;
595} 539}
596 540
597int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 541int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -607,12 +551,8 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
607 goto out; 551 goto out;
608 } 552 }
609 553
610 error = init_inode(inode, dentry);
611 if (error)
612 goto out_put;
613
614 error = -ENOMEM; 554 error = -ENOMEM;
615 name = dentry_name(dentry, 0); 555 name = dentry_name(dentry);
616 if (name == NULL) 556 if (name == NULL)
617 goto out_put; 557 goto out_put;
618 558
@@ -622,9 +562,10 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
622 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); 562 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
623 if (fd < 0) 563 if (fd < 0)
624 error = fd; 564 error = fd;
625 else error = read_name(inode, name); 565 else
566 error = read_name(inode, name);
626 567
627 kfree(name); 568 __putname(name);
628 if (error) 569 if (error)
629 goto out_put; 570 goto out_put;
630 571
@@ -652,17 +593,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
652 goto out; 593 goto out;
653 } 594 }
654 595
655 err = init_inode(inode, dentry);
656 if (err)
657 goto out_put;
658
659 err = -ENOMEM; 596 err = -ENOMEM;
660 name = dentry_name(dentry, 0); 597 name = dentry_name(dentry);
661 if (name == NULL) 598 if (name == NULL)
662 goto out_put; 599 goto out_put;
663 600
664 err = read_name(inode, name); 601 err = read_name(inode, name);
665 kfree(name); 602
603 __putname(name);
666 if (err == -ENOENT) { 604 if (err == -ENOENT) {
667 iput(inode); 605 iput(inode);
668 inode = NULL; 606 inode = NULL;
@@ -680,36 +618,21 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
680 return ERR_PTR(err); 618 return ERR_PTR(err);
681} 619}
682 620
683static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
684{
685 char *file;
686 int len;
687
688 file = inode_name(ino, dentry->d_name.len + 1);
689 if (file == NULL)
690 return NULL;
691 strcat(file, "/");
692 len = strlen(file);
693 strncat(file, dentry->d_name.name, dentry->d_name.len);
694 file[len + dentry->d_name.len] = '\0';
695 return file;
696}
697
698int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) 621int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
699{ 622{
700 char *from_name, *to_name; 623 char *from_name, *to_name;
701 int err; 624 int err;
702 625
703 if ((from_name = inode_dentry_name(ino, from)) == NULL) 626 if ((from_name = dentry_name(from)) == NULL)
704 return -ENOMEM; 627 return -ENOMEM;
705 to_name = dentry_name(to, 0); 628 to_name = dentry_name(to);
706 if (to_name == NULL) { 629 if (to_name == NULL) {
707 kfree(from_name); 630 __putname(from_name);
708 return -ENOMEM; 631 return -ENOMEM;
709 } 632 }
710 err = link_file(to_name, from_name); 633 err = link_file(to_name, from_name);
711 kfree(from_name); 634 __putname(from_name);
712 kfree(to_name); 635 __putname(to_name);
713 return err; 636 return err;
714} 637}
715 638
@@ -718,13 +641,14 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
718 char *file; 641 char *file;
719 int err; 642 int err;
720 643
721 if ((file = inode_dentry_name(ino, dentry)) == NULL)
722 return -ENOMEM;
723 if (append) 644 if (append)
724 return -EPERM; 645 return -EPERM;
725 646
647 if ((file = dentry_name(dentry)) == NULL)
648 return -ENOMEM;
649
726 err = unlink_file(file); 650 err = unlink_file(file);
727 kfree(file); 651 __putname(file);
728 return err; 652 return err;
729} 653}
730 654
@@ -733,10 +657,10 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
733 char *file; 657 char *file;
734 int err; 658 int err;
735 659
736 if ((file = inode_dentry_name(ino, dentry)) == NULL) 660 if ((file = dentry_name(dentry)) == NULL)
737 return -ENOMEM; 661 return -ENOMEM;
738 err = make_symlink(file, to); 662 err = make_symlink(file, to);
739 kfree(file); 663 __putname(file);
740 return err; 664 return err;
741} 665}
742 666
@@ -745,10 +669,10 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
745 char *file; 669 char *file;
746 int err; 670 int err;
747 671
748 if ((file = inode_dentry_name(ino, dentry)) == NULL) 672 if ((file = dentry_name(dentry)) == NULL)
749 return -ENOMEM; 673 return -ENOMEM;
750 err = do_mkdir(file, mode); 674 err = do_mkdir(file, mode);
751 kfree(file); 675 __putname(file);
752 return err; 676 return err;
753} 677}
754 678
@@ -757,10 +681,10 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
757 char *file; 681 char *file;
758 int err; 682 int err;
759 683
760 if ((file = inode_dentry_name(ino, dentry)) == NULL) 684 if ((file = dentry_name(dentry)) == NULL)
761 return -ENOMEM; 685 return -ENOMEM;
762 err = do_rmdir(file); 686 err = do_rmdir(file);
763 kfree(file); 687 __putname(file);
764 return err; 688 return err;
765} 689}
766 690
@@ -776,22 +700,20 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
776 goto out; 700 goto out;
777 } 701 }
778 702
779 err = init_inode(inode, dentry);
780 if (err)
781 goto out_put;
782
783 err = -ENOMEM; 703 err = -ENOMEM;
784 name = dentry_name(dentry, 0); 704 name = dentry_name(dentry);
785 if (name == NULL) 705 if (name == NULL)
786 goto out_put; 706 goto out_put;
787 707
788 init_special_inode(inode, mode, dev); 708 init_special_inode(inode, mode, dev);
789 err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); 709 err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
790 if (err) 710 if (!err)
791 goto out_free; 711 goto out_free;
792 712
793 err = read_name(inode, name); 713 err = read_name(inode, name);
794 kfree(name); 714 __putname(name);
715 if (err)
716 goto out_put;
795 if (err) 717 if (err)
796 goto out_put; 718 goto out_put;
797 719
@@ -799,7 +721,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
799 return 0; 721 return 0;
800 722
801 out_free: 723 out_free:
802 kfree(name); 724 __putname(name);
803 out_put: 725 out_put:
804 iput(inode); 726 iput(inode);
805 out: 727 out:
@@ -812,15 +734,15 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
812 char *from_name, *to_name; 734 char *from_name, *to_name;
813 int err; 735 int err;
814 736
815 if ((from_name = inode_dentry_name(from_ino, from)) == NULL) 737 if ((from_name = dentry_name(from)) == NULL)
816 return -ENOMEM; 738 return -ENOMEM;
817 if ((to_name = inode_dentry_name(to_ino, to)) == NULL) { 739 if ((to_name = dentry_name(to)) == NULL) {
818 kfree(from_name); 740 __putname(from_name);
819 return -ENOMEM; 741 return -ENOMEM;
820 } 742 }
821 err = rename_file(from_name, to_name); 743 err = rename_file(from_name, to_name);
822 kfree(from_name); 744 __putname(from_name);
823 kfree(to_name); 745 __putname(to_name);
824 return err; 746 return err;
825} 747}
826 748
@@ -832,7 +754,7 @@ int hostfs_permission(struct inode *ino, int desired)
832 if (desired & MAY_READ) r = 1; 754 if (desired & MAY_READ) r = 1;
833 if (desired & MAY_WRITE) w = 1; 755 if (desired & MAY_WRITE) w = 1;
834 if (desired & MAY_EXEC) x = 1; 756 if (desired & MAY_EXEC) x = 1;
835 name = inode_name(ino, 0); 757 name = inode_name(ino);
836 if (name == NULL) 758 if (name == NULL)
837 return -ENOMEM; 759 return -ENOMEM;
838 760
@@ -841,7 +763,7 @@ int hostfs_permission(struct inode *ino, int desired)
841 err = 0; 763 err = 0;
842 else 764 else
843 err = access_file(name, r, w, x); 765 err = access_file(name, r, w, x);
844 kfree(name); 766 __putname(name);
845 if (!err) 767 if (!err)
846 err = generic_permission(ino, desired, NULL); 768 err = generic_permission(ino, desired, NULL);
847 return err; 769 return err;
@@ -849,13 +771,14 @@ int hostfs_permission(struct inode *ino, int desired)
849 771
850int hostfs_setattr(struct dentry *dentry, struct iattr *attr) 772int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
851{ 773{
774 struct inode *inode = dentry->d_inode;
852 struct hostfs_iattr attrs; 775 struct hostfs_iattr attrs;
853 char *name; 776 char *name;
854 int err; 777 int err;
855 778
856 int fd = HOSTFS_I(dentry->d_inode)->fd; 779 int fd = HOSTFS_I(inode)->fd;
857 780
858 err = inode_change_ok(dentry->d_inode, attr); 781 err = inode_change_ok(inode, attr);
859 if (err) 782 if (err)
860 return err; 783 return err;
861 784
@@ -897,15 +820,26 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
897 if (attr->ia_valid & ATTR_MTIME_SET) { 820 if (attr->ia_valid & ATTR_MTIME_SET) {
898 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; 821 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
899 } 822 }
900 name = dentry_name(dentry, 0); 823 name = dentry_name(dentry);
901 if (name == NULL) 824 if (name == NULL)
902 return -ENOMEM; 825 return -ENOMEM;
903 err = set_attr(name, &attrs, fd); 826 err = set_attr(name, &attrs, fd);
904 kfree(name); 827 __putname(name);
905 if (err) 828 if (err)
906 return err; 829 return err;
907 830
908 return inode_setattr(dentry->d_inode, attr); 831 if ((attr->ia_valid & ATTR_SIZE) &&
832 attr->ia_size != i_size_read(inode)) {
833 int error;
834
835 error = vmtruncate(inode, attr->ia_size);
836 if (err)
837 return err;
838 }
839
840 setattr_copy(inode, attr);
841 mark_inode_dirty(inode);
842 return 0;
909} 843}
910 844
911static const struct inode_operations hostfs_iops = { 845static const struct inode_operations hostfs_iops = {
@@ -935,32 +869,41 @@ static const struct inode_operations hostfs_dir_iops = {
935 .setattr = hostfs_setattr, 869 .setattr = hostfs_setattr,
936}; 870};
937 871
938int hostfs_link_readpage(struct file *file, struct page *page) 872static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
939{ 873{
940 char *buffer, *name; 874 char *link = __getname();
941 int err; 875 if (link) {
942 876 char *path = dentry_name(dentry);
943 buffer = kmap(page); 877 int err = -ENOMEM;
944 name = inode_name(page->mapping->host, 0); 878 if (path) {
945 if (name == NULL) 879 int err = hostfs_do_readlink(path, link, PATH_MAX);
946 return -ENOMEM; 880 if (err == PATH_MAX)
947 err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE); 881 err = -E2BIG;
948 kfree(name); 882 __putname(path);
949 if (err == PAGE_CACHE_SIZE) 883 }
950 err = -E2BIG; 884 if (err < 0) {
951 else if (err > 0) { 885 __putname(link);
952 flush_dcache_page(page); 886 link = ERR_PTR(err);
953 SetPageUptodate(page); 887 }
954 if (PageError(page)) ClearPageError(page); 888 } else {
955 err = 0; 889 link = ERR_PTR(-ENOMEM);
956 } 890 }
957 kunmap(page); 891
958 unlock_page(page); 892 nd_set_link(nd, link);
959 return err; 893 return NULL;
960} 894}
961 895
962static const struct address_space_operations hostfs_link_aops = { 896static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
963 .readpage = hostfs_link_readpage, 897{
898 char *s = nd_get_link(nd);
899 if (!IS_ERR(s))
900 __putname(s);
901}
902
903static const struct inode_operations hostfs_link_iops = {
904 .readlink = generic_readlink,
905 .follow_link = hostfs_follow_link,
906 .put_link = hostfs_put_link,
964}; 907};
965 908
966static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) 909static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
@@ -980,49 +923,41 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
980 req_root = ""; 923 req_root = "";
981 924
982 err = -ENOMEM; 925 err = -ENOMEM;
983 host_root_path = kmalloc(strlen(root_ino) + 1 926 sb->s_fs_info = host_root_path =
984 + strlen(req_root) + 1, GFP_KERNEL); 927 kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL);
985 if (host_root_path == NULL) 928 if (host_root_path == NULL)
986 goto out; 929 goto out;
987 930
988 sprintf(host_root_path, "%s/%s", root_ino, req_root); 931 sprintf(host_root_path, "%s/%s", root_ino, req_root);
989 932
990 root_inode = hostfs_iget(sb); 933 root_inode = new_inode(sb);
991 if (IS_ERR(root_inode)) { 934 if (!root_inode)
992 err = PTR_ERR(root_inode); 935 goto out;
993 goto out_free;
994 }
995 936
996 err = init_inode(root_inode, NULL); 937 err = read_name(root_inode, host_root_path);
997 if (err) 938 if (err)
998 goto out_put; 939 goto out_put;
999 940
1000 HOSTFS_I(root_inode)->host_filename = host_root_path; 941 if (S_ISLNK(root_inode->i_mode)) {
1001 /* 942 char *name = follow_link(host_root_path);
1002 * Avoid that in the error path, iput(root_inode) frees again 943 if (IS_ERR(name))
1003 * host_root_path through hostfs_destroy_inode! 944 err = PTR_ERR(name);
1004 */ 945 else
1005 host_root_path = NULL; 946 err = read_name(root_inode, name);
947 kfree(name);
948 if (err)
949 goto out_put;
950 }
1006 951
1007 err = -ENOMEM; 952 err = -ENOMEM;
1008 sb->s_root = d_alloc_root(root_inode); 953 sb->s_root = d_alloc_root(root_inode);
1009 if (sb->s_root == NULL) 954 if (sb->s_root == NULL)
1010 goto out_put; 955 goto out_put;
1011 956
1012 err = hostfs_read_inode(root_inode);
1013 if (err) {
1014 /* No iput in this case because the dput does that for us */
1015 dput(sb->s_root);
1016 sb->s_root = NULL;
1017 goto out;
1018 }
1019
1020 return 0; 957 return 0;
1021 958
1022out_put: 959out_put:
1023 iput(root_inode); 960 iput(root_inode);
1024out_free:
1025 kfree(host_root_path);
1026out: 961out:
1027 return err; 962 return err;
1028} 963}
@@ -1034,11 +969,17 @@ static int hostfs_read_sb(struct file_system_type *type,
1034 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); 969 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
1035} 970}
1036 971
972static void hostfs_kill_sb(struct super_block *s)
973{
974 kill_anon_super(s);
975 kfree(s->s_fs_info);
976}
977
1037static struct file_system_type hostfs_type = { 978static struct file_system_type hostfs_type = {
1038 .owner = THIS_MODULE, 979 .owner = THIS_MODULE,
1039 .name = "hostfs", 980 .name = "hostfs",
1040 .get_sb = hostfs_read_sb, 981 .get_sb = hostfs_read_sb,
1041 .kill_sb = kill_anon_super, 982 .kill_sb = hostfs_kill_sb,
1042 .fs_flags = 0, 983 .fs_flags = 0,
1043}; 984};
1044 985
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index b79424f93282..6777aa06ce2c 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -19,11 +19,27 @@
19#include "user.h" 19#include "user.h"
20#include <utime.h> 20#include <utime.h>
21 21
22int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, 22static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
23 int *nlink_out, int *uid_out, int *gid_out, 23{
24 unsigned long long *size_out, struct timespec *atime_out, 24 p->ino = buf->st_ino;
25 struct timespec *mtime_out, struct timespec *ctime_out, 25 p->mode = buf->st_mode;
26 int *blksize_out, unsigned long long *blocks_out, int fd) 26 p->nlink = buf->st_nlink;
27 p->uid = buf->st_uid;
28 p->gid = buf->st_gid;
29 p->size = buf->st_size;
30 p->atime.tv_sec = buf->st_atime;
31 p->atime.tv_nsec = 0;
32 p->ctime.tv_sec = buf->st_ctime;
33 p->ctime.tv_nsec = 0;
34 p->mtime.tv_sec = buf->st_mtime;
35 p->mtime.tv_nsec = 0;
36 p->blksize = buf->st_blksize;
37 p->blocks = buf->st_blocks;
38 p->maj = os_major(buf->st_rdev);
39 p->min = os_minor(buf->st_rdev);
40}
41
42int stat_file(const char *path, struct hostfs_stat *p, int fd)
27{ 43{
28 struct stat64 buf; 44 struct stat64 buf;
29 45
@@ -33,68 +49,10 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
33 } else if (lstat64(path, &buf) < 0) { 49 } else if (lstat64(path, &buf) < 0) {
34 return -errno; 50 return -errno;
35 } 51 }
36 52 stat64_to_hostfs(&buf, p);
37 if (inode_out != NULL)
38 *inode_out = buf.st_ino;
39 if (mode_out != NULL)
40 *mode_out = buf.st_mode;
41 if (nlink_out != NULL)
42 *nlink_out = buf.st_nlink;
43 if (uid_out != NULL)
44 *uid_out = buf.st_uid;
45 if (gid_out != NULL)
46 *gid_out = buf.st_gid;
47 if (size_out != NULL)
48 *size_out = buf.st_size;
49 if (atime_out != NULL) {
50 atime_out->tv_sec = buf.st_atime;
51 atime_out->tv_nsec = 0;
52 }
53 if (mtime_out != NULL) {
54 mtime_out->tv_sec = buf.st_mtime;
55 mtime_out->tv_nsec = 0;
56 }
57 if (ctime_out != NULL) {
58 ctime_out->tv_sec = buf.st_ctime;
59 ctime_out->tv_nsec = 0;
60 }
61 if (blksize_out != NULL)
62 *blksize_out = buf.st_blksize;
63 if (blocks_out != NULL)
64 *blocks_out = buf.st_blocks;
65 return 0; 53 return 0;
66} 54}
67 55
68int file_type(const char *path, int *maj, int *min)
69{
70 struct stat64 buf;
71
72 if (lstat64(path, &buf) < 0)
73 return -errno;
74 /*
75 * We cannot pass rdev as is because glibc and the kernel disagree
76 * about its definition.
77 */
78 if (maj != NULL)
79 *maj = major(buf.st_rdev);
80 if (min != NULL)
81 *min = minor(buf.st_rdev);
82
83 if (S_ISDIR(buf.st_mode))
84 return OS_TYPE_DIR;
85 else if (S_ISLNK(buf.st_mode))
86 return OS_TYPE_SYMLINK;
87 else if (S_ISCHR(buf.st_mode))
88 return OS_TYPE_CHARDEV;
89 else if (S_ISBLK(buf.st_mode))
90 return OS_TYPE_BLOCKDEV;
91 else if (S_ISFIFO(buf.st_mode))
92 return OS_TYPE_FIFO;
93 else if (S_ISSOCK(buf.st_mode))
94 return OS_TYPE_SOCK;
95 else return OS_TYPE_FILE;
96}
97
98int access_file(char *path, int r, int w, int x) 56int access_file(char *path, int r, int w, int x)
99{ 57{
100 int mode = 0; 58 int mode = 0;
@@ -202,6 +160,11 @@ int fsync_file(int fd, int datasync)
202 return 0; 160 return 0;
203} 161}
204 162
163int replace_file(int oldfd, int fd)
164{
165 return dup2(oldfd, fd);
166}
167
205void close_file(void *stream) 168void close_file(void *stream)
206{ 169{
207 close(*((int *) stream)); 170 close(*((int *) stream));
@@ -235,8 +198,8 @@ int file_create(char *name, int ur, int uw, int ux, int gr,
235 198
236int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) 199int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
237{ 200{
201 struct hostfs_stat st;
238 struct timeval times[2]; 202 struct timeval times[2];
239 struct timespec atime_ts, mtime_ts;
240 int err, ma; 203 int err, ma;
241 204
242 if (attrs->ia_valid & HOSTFS_ATTR_MODE) { 205 if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
@@ -279,15 +242,14 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
279 */ 242 */
280 ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); 243 ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET);
281 if (attrs->ia_valid & ma) { 244 if (attrs->ia_valid & ma) {
282 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 245 err = stat_file(file, &st, fd);
283 &atime_ts, &mtime_ts, NULL, NULL, NULL, fd);
284 if (err != 0) 246 if (err != 0)
285 return err; 247 return err;
286 248
287 times[0].tv_sec = atime_ts.tv_sec; 249 times[0].tv_sec = st.atime.tv_sec;
288 times[0].tv_usec = atime_ts.tv_nsec / 1000; 250 times[0].tv_usec = st.atime.tv_nsec / 1000;
289 times[1].tv_sec = mtime_ts.tv_sec; 251 times[1].tv_sec = st.mtime.tv_sec;
290 times[1].tv_usec = mtime_ts.tv_nsec / 1000; 252 times[1].tv_usec = st.mtime.tv_nsec / 1000;
291 253
292 if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { 254 if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
293 times[0].tv_sec = attrs->ia_atime.tv_sec; 255 times[0].tv_sec = attrs->ia_atime.tv_sec;
@@ -308,9 +270,9 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
308 270
309 /* Note: ctime is not handled */ 271 /* Note: ctime is not handled */
310 if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) { 272 if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) {
311 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 273 err = stat_file(file, &st, fd);
312 &attrs->ia_atime, &attrs->ia_mtime, NULL, 274 attrs->ia_atime = st.atime;
313 NULL, NULL, fd); 275 attrs->ia_mtime = st.mtime;
314 if (err != 0) 276 if (err != 0)
315 return err; 277 return err;
316 } 278 }
@@ -361,7 +323,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
361{ 323{
362 int err; 324 int err;
363 325
364 err = mknod(file, mode, makedev(major, minor)); 326 err = mknod(file, mode, os_makedev(major, minor));
365 if (err) 327 if (err)
366 return -errno; 328 return -errno;
367 return 0; 329 return 0;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index a9ae9bfa752f..c0340887c7ea 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -97,10 +97,19 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
97 loff_t pos, unsigned len, unsigned flags, 97 loff_t pos, unsigned len, unsigned flags,
98 struct page **pagep, void **fsdata) 98 struct page **pagep, void **fsdata)
99{ 99{
100 int ret;
101
100 *pagep = NULL; 102 *pagep = NULL;
101 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 103 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
102 hpfs_get_block, 104 hpfs_get_block,
103 &hpfs_i(mapping->host)->mmu_private); 105 &hpfs_i(mapping->host)->mmu_private);
106 if (unlikely(ret)) {
107 loff_t isize = mapping->host->i_size;
108 if (pos + len > isize)
109 vmtruncate(mapping->host, isize);
110 }
111
112 return ret;
104} 113}
105 114
106static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) 115static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 75f9d4324851..b59eac0232a0 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -281,7 +281,7 @@ void hpfs_write_inode(struct inode *);
281void hpfs_write_inode_nolock(struct inode *); 281void hpfs_write_inode_nolock(struct inode *);
282int hpfs_setattr(struct dentry *, struct iattr *); 282int hpfs_setattr(struct dentry *, struct iattr *);
283void hpfs_write_if_changed(struct inode *); 283void hpfs_write_if_changed(struct inode *);
284void hpfs_delete_inode(struct inode *); 284void hpfs_evict_inode(struct inode *);
285 285
286/* map.c */ 286/* map.c */
287 287
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 1042a9bc97f3..56f0da1cfd10 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -277,9 +277,15 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
277 if (error) 277 if (error)
278 goto out_unlock; 278 goto out_unlock;
279 279
280 error = inode_setattr(inode, attr); 280 if ((attr->ia_valid & ATTR_SIZE) &&
281 if (error) 281 attr->ia_size != i_size_read(inode)) {
282 goto out_unlock; 282 error = vmtruncate(inode, attr->ia_size);
283 if (error)
284 return error;
285 }
286
287 setattr_copy(inode, attr);
288 mark_inode_dirty(inode);
283 289
284 hpfs_write_inode(inode); 290 hpfs_write_inode(inode);
285 291
@@ -296,11 +302,13 @@ void hpfs_write_if_changed(struct inode *inode)
296 hpfs_write_inode(inode); 302 hpfs_write_inode(inode);
297} 303}
298 304
299void hpfs_delete_inode(struct inode *inode) 305void hpfs_evict_inode(struct inode *inode)
300{ 306{
301 truncate_inode_pages(&inode->i_data, 0); 307 truncate_inode_pages(&inode->i_data, 0);
302 lock_kernel(); 308 end_writeback(inode);
303 hpfs_remove_fnode(inode->i_sb, inode->i_ino); 309 if (!inode->i_nlink) {
304 unlock_kernel(); 310 lock_kernel();
305 clear_inode(inode); 311 hpfs_remove_fnode(inode->i_sb, inode->i_ino);
312 unlock_kernel();
313 }
306} 314}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index aa53842c599c..2607010be2fe 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -450,7 +450,7 @@ static const struct super_operations hpfs_sops =
450{ 450{
451 .alloc_inode = hpfs_alloc_inode, 451 .alloc_inode = hpfs_alloc_inode,
452 .destroy_inode = hpfs_destroy_inode, 452 .destroy_inode = hpfs_destroy_inode,
453 .delete_inode = hpfs_delete_inode, 453 .evict_inode = hpfs_evict_inode,
454 .put_super = hpfs_put_super, 454 .put_super = hpfs_put_super,
455 .statfs = hpfs_statfs, 455 .statfs = hpfs_statfs,
456 .remount_fs = hpfs_remount_fs, 456 .remount_fs = hpfs_remount_fs,
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 826c3f9d29ac..7b027720d820 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -15,6 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/statfs.h> 16#include <linux/statfs.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/pid_namespace.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include "os.h" 20#include "os.h"
20 21
@@ -623,12 +624,11 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
623 return &hi->vfs_inode; 624 return &hi->vfs_inode;
624} 625}
625 626
626void hppfs_delete_inode(struct inode *ino) 627void hppfs_evict_inode(struct inode *ino)
627{ 628{
629 end_writeback(ino);
628 dput(HPPFS_I(ino)->proc_dentry); 630 dput(HPPFS_I(ino)->proc_dentry);
629 mntput(ino->i_sb->s_fs_info); 631 mntput(ino->i_sb->s_fs_info);
630
631 clear_inode(ino);
632} 632}
633 633
634static void hppfs_destroy_inode(struct inode *inode) 634static void hppfs_destroy_inode(struct inode *inode)
@@ -639,7 +639,7 @@ static void hppfs_destroy_inode(struct inode *inode)
639static const struct super_operations hppfs_sbops = { 639static const struct super_operations hppfs_sbops = {
640 .alloc_inode = hppfs_alloc_inode, 640 .alloc_inode = hppfs_alloc_inode,
641 .destroy_inode = hppfs_destroy_inode, 641 .destroy_inode = hppfs_destroy_inode,
642 .delete_inode = hppfs_delete_inode, 642 .evict_inode = hppfs_evict_inode,
643 .statfs = hppfs_statfs, 643 .statfs = hppfs_statfs,
644}; 644};
645 645
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a4e9a7ec3691..6e5bd42f3860 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -371,27 +371,10 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
371 hugetlb_unreserve_pages(inode, start, freed); 371 hugetlb_unreserve_pages(inode, start, freed);
372} 372}
373 373
374static void hugetlbfs_delete_inode(struct inode *inode) 374static void hugetlbfs_evict_inode(struct inode *inode)
375{ 375{
376 truncate_hugepages(inode, 0); 376 truncate_hugepages(inode, 0);
377 clear_inode(inode); 377 end_writeback(inode);
378}
379
380static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
381{
382 if (generic_detach_inode(inode)) {
383 truncate_hugepages(inode, 0);
384 clear_inode(inode);
385 destroy_inode(inode);
386 }
387}
388
389static void hugetlbfs_drop_inode(struct inode *inode)
390{
391 if (!inode->i_nlink)
392 generic_delete_inode(inode);
393 else
394 hugetlbfs_forget_inode(inode);
395} 378}
396 379
397static inline void 380static inline void
@@ -448,19 +431,20 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
448 431
449 error = inode_change_ok(inode, attr); 432 error = inode_change_ok(inode, attr);
450 if (error) 433 if (error)
451 goto out; 434 return error;
452 435
453 if (ia_valid & ATTR_SIZE) { 436 if (ia_valid & ATTR_SIZE) {
454 error = -EINVAL; 437 error = -EINVAL;
455 if (!(attr->ia_size & ~huge_page_mask(h))) 438 if (attr->ia_size & ~huge_page_mask(h))
456 error = hugetlb_vmtruncate(inode, attr->ia_size); 439 return -EINVAL;
440 error = hugetlb_vmtruncate(inode, attr->ia_size);
457 if (error) 441 if (error)
458 goto out; 442 return error;
459 attr->ia_valid &= ~ATTR_SIZE;
460 } 443 }
461 error = inode_setattr(inode, attr); 444
462out: 445 setattr_copy(inode, attr);
463 return error; 446 mark_inode_dirty(inode);
447 return 0;
464} 448}
465 449
466static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 450static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
@@ -712,9 +696,8 @@ static const struct inode_operations hugetlbfs_inode_operations = {
712static const struct super_operations hugetlbfs_ops = { 696static const struct super_operations hugetlbfs_ops = {
713 .alloc_inode = hugetlbfs_alloc_inode, 697 .alloc_inode = hugetlbfs_alloc_inode,
714 .destroy_inode = hugetlbfs_destroy_inode, 698 .destroy_inode = hugetlbfs_destroy_inode,
699 .evict_inode = hugetlbfs_evict_inode,
715 .statfs = hugetlbfs_statfs, 700 .statfs = hugetlbfs_statfs,
716 .delete_inode = hugetlbfs_delete_inode,
717 .drop_inode = hugetlbfs_drop_inode,
718 .put_super = hugetlbfs_put_super, 701 .put_super = hugetlbfs_put_super,
719 .show_options = generic_show_options, 702 .show_options = generic_show_options,
720}; 703};
diff --git a/fs/inode.c b/fs/inode.c
index a2da778467bb..86464332e590 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -289,32 +289,34 @@ void __iget(struct inode *inode)
289 inodes_stat.nr_unused--; 289 inodes_stat.nr_unused--;
290} 290}
291 291
292/** 292void end_writeback(struct inode *inode)
293 * clear_inode - clear an inode
294 * @inode: inode to clear
295 *
296 * This is called by the filesystem to tell us
297 * that the inode is no longer useful. We just
298 * terminate it with extreme prejudice.
299 */
300void clear_inode(struct inode *inode)
301{ 293{
302 might_sleep(); 294 might_sleep();
303 invalidate_inode_buffers(inode);
304
305 BUG_ON(inode->i_data.nrpages); 295 BUG_ON(inode->i_data.nrpages);
296 BUG_ON(!list_empty(&inode->i_data.private_list));
306 BUG_ON(!(inode->i_state & I_FREEING)); 297 BUG_ON(!(inode->i_state & I_FREEING));
307 BUG_ON(inode->i_state & I_CLEAR); 298 BUG_ON(inode->i_state & I_CLEAR);
308 inode_sync_wait(inode); 299 inode_sync_wait(inode);
309 if (inode->i_sb->s_op->clear_inode) 300 inode->i_state = I_FREEING | I_CLEAR;
310 inode->i_sb->s_op->clear_inode(inode); 301}
302EXPORT_SYMBOL(end_writeback);
303
304static void evict(struct inode *inode)
305{
306 const struct super_operations *op = inode->i_sb->s_op;
307
308 if (op->evict_inode) {
309 op->evict_inode(inode);
310 } else {
311 if (inode->i_data.nrpages)
312 truncate_inode_pages(&inode->i_data, 0);
313 end_writeback(inode);
314 }
311 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 315 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
312 bd_forget(inode); 316 bd_forget(inode);
313 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 317 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
314 cd_forget(inode); 318 cd_forget(inode);
315 inode->i_state = I_CLEAR;
316} 319}
317EXPORT_SYMBOL(clear_inode);
318 320
319/* 321/*
320 * dispose_list - dispose of the contents of a local list 322 * dispose_list - dispose of the contents of a local list
@@ -333,9 +335,7 @@ static void dispose_list(struct list_head *head)
333 inode = list_first_entry(head, struct inode, i_list); 335 inode = list_first_entry(head, struct inode, i_list);
334 list_del(&inode->i_list); 336 list_del(&inode->i_list);
335 337
336 if (inode->i_data.nrpages) 338 evict(inode);
337 truncate_inode_pages(&inode->i_data, 0);
338 clear_inode(inode);
339 339
340 spin_lock(&inode_lock); 340 spin_lock(&inode_lock);
341 hlist_del_init(&inode->i_hash); 341 hlist_del_init(&inode->i_hash);
@@ -547,7 +547,7 @@ repeat:
547 continue; 547 continue;
548 if (!test(inode, data)) 548 if (!test(inode, data))
549 continue; 549 continue;
550 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 550 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
551 __wait_on_freeing_inode(inode); 551 __wait_on_freeing_inode(inode);
552 goto repeat; 552 goto repeat;
553 } 553 }
@@ -572,7 +572,7 @@ repeat:
572 continue; 572 continue;
573 if (inode->i_sb != sb) 573 if (inode->i_sb != sb)
574 continue; 574 continue;
575 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 575 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
576 __wait_on_freeing_inode(inode); 576 __wait_on_freeing_inode(inode);
577 goto repeat; 577 goto repeat;
578 } 578 }
@@ -834,7 +834,7 @@ EXPORT_SYMBOL(iunique);
834struct inode *igrab(struct inode *inode) 834struct inode *igrab(struct inode *inode)
835{ 835{
836 spin_lock(&inode_lock); 836 spin_lock(&inode_lock);
837 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) 837 if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
838 __iget(inode); 838 __iget(inode);
839 else 839 else
840 /* 840 /*
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
1083 continue; 1083 continue;
1084 if (old->i_sb != sb) 1084 if (old->i_sb != sb)
1085 continue; 1085 continue;
1086 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1086 if (old->i_state & (I_FREEING|I_WILL_FREE))
1087 continue; 1087 continue;
1088 break; 1088 break;
1089 } 1089 }
@@ -1122,7 +1122,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1122 continue; 1122 continue;
1123 if (!test(old, data)) 1123 if (!test(old, data))
1124 continue; 1124 continue;
1125 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1125 if (old->i_state & (I_FREEING|I_WILL_FREE))
1126 continue; 1126 continue;
1127 break; 1127 break;
1128 } 1128 }
@@ -1174,69 +1174,51 @@ void remove_inode_hash(struct inode *inode)
1174} 1174}
1175EXPORT_SYMBOL(remove_inode_hash); 1175EXPORT_SYMBOL(remove_inode_hash);
1176 1176
1177int generic_delete_inode(struct inode *inode)
1178{
1179 return 1;
1180}
1181EXPORT_SYMBOL(generic_delete_inode);
1182
1177/* 1183/*
1178 * Tell the filesystem that this inode is no longer of any interest and should 1184 * Normal UNIX filesystem behaviour: delete the
1179 * be completely destroyed. 1185 * inode when the usage count drops to zero, and
1180 * 1186 * i_nlink is zero.
1181 * We leave the inode in the inode hash table until *after* the filesystem's
1182 * ->delete_inode completes. This ensures that an iget (such as nfsd might
1183 * instigate) will always find up-to-date information either in the hash or on
1184 * disk.
1185 *
1186 * I_FREEING is set so that no-one will take a new reference to the inode while
1187 * it is being deleted.
1188 */ 1187 */
1189void generic_delete_inode(struct inode *inode) 1188int generic_drop_inode(struct inode *inode)
1190{ 1189{
1191 const struct super_operations *op = inode->i_sb->s_op; 1190 return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
1192
1193 list_del_init(&inode->i_list);
1194 list_del_init(&inode->i_sb_list);
1195 WARN_ON(inode->i_state & I_NEW);
1196 inode->i_state |= I_FREEING;
1197 inodes_stat.nr_inodes--;
1198 spin_unlock(&inode_lock);
1199
1200 if (op->delete_inode) {
1201 void (*delete)(struct inode *) = op->delete_inode;
1202 /* Filesystems implementing their own
1203 * s_op->delete_inode are required to call
1204 * truncate_inode_pages and clear_inode()
1205 * internally */
1206 delete(inode);
1207 } else {
1208 truncate_inode_pages(&inode->i_data, 0);
1209 clear_inode(inode);
1210 }
1211 spin_lock(&inode_lock);
1212 hlist_del_init(&inode->i_hash);
1213 spin_unlock(&inode_lock);
1214 wake_up_inode(inode);
1215 BUG_ON(inode->i_state != I_CLEAR);
1216 destroy_inode(inode);
1217} 1191}
1218EXPORT_SYMBOL(generic_delete_inode); 1192EXPORT_SYMBOL_GPL(generic_drop_inode);
1219 1193
1220/** 1194/*
1221 * generic_detach_inode - remove inode from inode lists 1195 * Called when we're dropping the last reference
1222 * @inode: inode to remove 1196 * to an inode.
1223 *
1224 * Remove inode from inode lists, write it if it's dirty. This is just an
1225 * internal VFS helper exported for hugetlbfs. Do not use!
1226 * 1197 *
1227 * Returns 1 if inode should be completely destroyed. 1198 * Call the FS "drop_inode()" function, defaulting to
1199 * the legacy UNIX filesystem behaviour. If it tells
1200 * us to evict inode, do so. Otherwise, retain inode
1201 * in cache if fs is alive, sync and evict if fs is
1202 * shutting down.
1228 */ 1203 */
1229int generic_detach_inode(struct inode *inode) 1204static void iput_final(struct inode *inode)
1230{ 1205{
1231 struct super_block *sb = inode->i_sb; 1206 struct super_block *sb = inode->i_sb;
1207 const struct super_operations *op = inode->i_sb->s_op;
1208 int drop;
1232 1209
1233 if (!hlist_unhashed(&inode->i_hash)) { 1210 if (op && op->drop_inode)
1211 drop = op->drop_inode(inode);
1212 else
1213 drop = generic_drop_inode(inode);
1214
1215 if (!drop) {
1234 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1216 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1235 list_move(&inode->i_list, &inode_unused); 1217 list_move(&inode->i_list, &inode_unused);
1236 inodes_stat.nr_unused++; 1218 inodes_stat.nr_unused++;
1237 if (sb->s_flags & MS_ACTIVE) { 1219 if (sb->s_flags & MS_ACTIVE) {
1238 spin_unlock(&inode_lock); 1220 spin_unlock(&inode_lock);
1239 return 0; 1221 return;
1240 } 1222 }
1241 WARN_ON(inode->i_state & I_NEW); 1223 WARN_ON(inode->i_state & I_NEW);
1242 inode->i_state |= I_WILL_FREE; 1224 inode->i_state |= I_WILL_FREE;
@@ -1254,56 +1236,15 @@ int generic_detach_inode(struct inode *inode)
1254 inode->i_state |= I_FREEING; 1236 inode->i_state |= I_FREEING;
1255 inodes_stat.nr_inodes--; 1237 inodes_stat.nr_inodes--;
1256 spin_unlock(&inode_lock); 1238 spin_unlock(&inode_lock);
1257 return 1; 1239 evict(inode);
1258} 1240 spin_lock(&inode_lock);
1259EXPORT_SYMBOL_GPL(generic_detach_inode); 1241 hlist_del_init(&inode->i_hash);
1260 1242 spin_unlock(&inode_lock);
1261static void generic_forget_inode(struct inode *inode)
1262{
1263 if (!generic_detach_inode(inode))
1264 return;
1265 if (inode->i_data.nrpages)
1266 truncate_inode_pages(&inode->i_data, 0);
1267 clear_inode(inode);
1268 wake_up_inode(inode); 1243 wake_up_inode(inode);
1244 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1269 destroy_inode(inode); 1245 destroy_inode(inode);
1270} 1246}
1271 1247
1272/*
1273 * Normal UNIX filesystem behaviour: delete the
1274 * inode when the usage count drops to zero, and
1275 * i_nlink is zero.
1276 */
1277void generic_drop_inode(struct inode *inode)
1278{
1279 if (!inode->i_nlink)
1280 generic_delete_inode(inode);
1281 else
1282 generic_forget_inode(inode);
1283}
1284EXPORT_SYMBOL_GPL(generic_drop_inode);
1285
1286/*
1287 * Called when we're dropping the last reference
1288 * to an inode.
1289 *
1290 * Call the FS "drop()" function, defaulting to
1291 * the legacy UNIX filesystem behaviour..
1292 *
1293 * NOTE! NOTE! NOTE! We're called with the inode lock
1294 * held, and the drop function is supposed to release
1295 * the lock!
1296 */
1297static inline void iput_final(struct inode *inode)
1298{
1299 const struct super_operations *op = inode->i_sb->s_op;
1300 void (*drop)(struct inode *) = generic_drop_inode;
1301
1302 if (op && op->drop_inode)
1303 drop = op->drop_inode;
1304 drop(inode);
1305}
1306
1307/** 1248/**
1308 * iput - put an inode 1249 * iput - put an inode
1309 * @inode: inode to put 1250 * @inode: inode to put
@@ -1316,7 +1257,7 @@ static inline void iput_final(struct inode *inode)
1316void iput(struct inode *inode) 1257void iput(struct inode *inode)
1317{ 1258{
1318 if (inode) { 1259 if (inode) {
1319 BUG_ON(inode->i_state == I_CLEAR); 1260 BUG_ON(inode->i_state & I_CLEAR);
1320 1261
1321 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1262 if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
1322 iput_final(inode); 1263 iput_final(inode);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 93d1e47647bd..f19ce94693d8 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1281,13 +1281,9 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
1281int journal_check_available_features (journal_t *journal, unsigned long compat, 1281int journal_check_available_features (journal_t *journal, unsigned long compat,
1282 unsigned long ro, unsigned long incompat) 1282 unsigned long ro, unsigned long incompat)
1283{ 1283{
1284 journal_superblock_t *sb;
1285
1286 if (!compat && !ro && !incompat) 1284 if (!compat && !ro && !incompat)
1287 return 1; 1285 return 1;
1288 1286
1289 sb = journal->j_superblock;
1290
1291 /* We can support any known requested features iff the 1287 /* We can support any known requested features iff the
1292 * superblock is in version 2. Otherwise we fail to support any 1288 * superblock is in version 2. Otherwise we fail to support any
1293 * extended sb features. */ 1289 * extended sb features. */
@@ -1481,7 +1477,6 @@ int journal_flush(journal_t *journal)
1481 1477
1482int journal_wipe(journal_t *journal, int write) 1478int journal_wipe(journal_t *journal, int write)
1483{ 1479{
1484 journal_superblock_t *sb;
1485 int err = 0; 1480 int err = 0;
1486 1481
1487 J_ASSERT (!(journal->j_flags & JFS_LOADED)); 1482 J_ASSERT (!(journal->j_flags & JFS_LOADED));
@@ -1490,8 +1485,6 @@ int journal_wipe(journal_t *journal, int write)
1490 if (err) 1485 if (err)
1491 return err; 1486 return err;
1492 1487
1493 sb = journal->j_superblock;
1494
1495 if (!journal->j_tail) 1488 if (!journal->j_tail)
1496 goto no_recovery; 1489 goto no_recovery;
1497 1490
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 54c9bc9e1b17..81051dafebf5 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -283,12 +283,9 @@ int journal_recover(journal_t *journal)
283int journal_skip_recovery(journal_t *journal) 283int journal_skip_recovery(journal_t *journal)
284{ 284{
285 int err; 285 int err;
286 journal_superblock_t * sb;
287
288 struct recovery_info info; 286 struct recovery_info info;
289 287
290 memset (&info, 0, sizeof(info)); 288 memset (&info, 0, sizeof(info));
291 sb = journal->j_superblock;
292 289
293 err = do_one_pass(journal, &info, PASS_SCAN); 290 err = do_one_pass(journal, &info, PASS_SCAN);
294 291
@@ -297,7 +294,8 @@ int journal_skip_recovery(journal_t *journal)
297 ++journal->j_transaction_sequence; 294 ++journal->j_transaction_sequence;
298 } else { 295 } else {
299#ifdef CONFIG_JBD_DEBUG 296#ifdef CONFIG_JBD_DEBUG
300 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 297 int dropped = info.end_transaction -
298 be32_to_cpu(journal->j_superblock->s_sequence);
301#endif 299#endif
302 jbd_debug(1, 300 jbd_debug(1,
303 "JBD: ignoring %d transaction%s from the journal.\n", 301 "JBD: ignoring %d transaction%s from the journal.\n",
@@ -321,11 +319,6 @@ static int do_one_pass(journal_t *journal,
321 unsigned int sequence; 319 unsigned int sequence;
322 int blocktype; 320 int blocktype;
323 321
324 /* Precompute the maximum metadata descriptors in a descriptor block */
325 int MAX_BLOCKS_PER_DESC;
326 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
327 / sizeof(journal_block_tag_t));
328
329 /* 322 /*
330 * First thing is to establish what we expect to find in the log 323 * First thing is to establish what we expect to find in the log
331 * (in terms of transaction IDs), and where (in terms of log 324 * (in terms of transaction IDs), and where (in terms of log
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 076d1cc44f95..1c23a0f4e8a3 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
118void __jbd2_log_wait_for_space(journal_t *journal) 118void __jbd2_log_wait_for_space(journal_t *journal)
119{ 119{
120 int nblocks, space_left; 120 int nblocks, space_left;
121 assert_spin_locked(&journal->j_state_lock); 121 /* assert_spin_locked(&journal->j_state_lock); */
122 122
123 nblocks = jbd_space_needed(journal); 123 nblocks = jbd_space_needed(journal);
124 while (__jbd2_log_space_left(journal) < nblocks) { 124 while (__jbd2_log_space_left(journal) < nblocks) {
125 if (journal->j_flags & JBD2_ABORT) 125 if (journal->j_flags & JBD2_ABORT)
126 return; 126 return;
127 spin_unlock(&journal->j_state_lock); 127 write_unlock(&journal->j_state_lock);
128 mutex_lock(&journal->j_checkpoint_mutex); 128 mutex_lock(&journal->j_checkpoint_mutex);
129 129
130 /* 130 /*
@@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
138 * filesystem, so abort the journal and leave a stack 138 * filesystem, so abort the journal and leave a stack
139 * trace for forensic evidence. 139 * trace for forensic evidence.
140 */ 140 */
141 spin_lock(&journal->j_state_lock); 141 write_lock(&journal->j_state_lock);
142 spin_lock(&journal->j_list_lock); 142 spin_lock(&journal->j_list_lock);
143 nblocks = jbd_space_needed(journal); 143 nblocks = jbd_space_needed(journal);
144 space_left = __jbd2_log_space_left(journal); 144 space_left = __jbd2_log_space_left(journal);
@@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
149 if (journal->j_committing_transaction) 149 if (journal->j_committing_transaction)
150 tid = journal->j_committing_transaction->t_tid; 150 tid = journal->j_committing_transaction->t_tid;
151 spin_unlock(&journal->j_list_lock); 151 spin_unlock(&journal->j_list_lock);
152 spin_unlock(&journal->j_state_lock); 152 write_unlock(&journal->j_state_lock);
153 if (chkpt) { 153 if (chkpt) {
154 jbd2_log_do_checkpoint(journal); 154 jbd2_log_do_checkpoint(journal);
155 } else if (jbd2_cleanup_journal_tail(journal) == 0) { 155 } else if (jbd2_cleanup_journal_tail(journal) == 0) {
@@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
167 WARN_ON(1); 167 WARN_ON(1);
168 jbd2_journal_abort(journal, 0); 168 jbd2_journal_abort(journal, 0);
169 } 169 }
170 spin_lock(&journal->j_state_lock); 170 write_lock(&journal->j_state_lock);
171 } else { 171 } else {
172 spin_unlock(&journal->j_list_lock); 172 spin_unlock(&journal->j_list_lock);
173 } 173 }
@@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
474 * next transaction ID we will write, and where it will 474 * next transaction ID we will write, and where it will
475 * start. */ 475 * start. */
476 476
477 spin_lock(&journal->j_state_lock); 477 write_lock(&journal->j_state_lock);
478 spin_lock(&journal->j_list_lock); 478 spin_lock(&journal->j_list_lock);
479 transaction = journal->j_checkpoint_transactions; 479 transaction = journal->j_checkpoint_transactions;
480 if (transaction) { 480 if (transaction) {
@@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
496 /* If the oldest pinned transaction is at the tail of the log 496 /* If the oldest pinned transaction is at the tail of the log
497 already then there's not much we can do right now. */ 497 already then there's not much we can do right now. */
498 if (journal->j_tail_sequence == first_tid) { 498 if (journal->j_tail_sequence == first_tid) {
499 spin_unlock(&journal->j_state_lock); 499 write_unlock(&journal->j_state_lock);
500 return 1; 500 return 1;
501 } 501 }
502 502
@@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
516 journal->j_free += freed; 516 journal->j_free += freed;
517 journal->j_tail_sequence = first_tid; 517 journal->j_tail_sequence = first_tid;
518 journal->j_tail = blocknr; 518 journal->j_tail = blocknr;
519 spin_unlock(&journal->j_state_lock); 519 write_unlock(&journal->j_state_lock);
520 520
521 /* 521 /*
522 * If there is an external journal, we need to make sure that 522 * If there is an external journal, we need to make sure that
@@ -775,7 +775,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
775 J_ASSERT(transaction->t_log_list == NULL); 775 J_ASSERT(transaction->t_log_list == NULL);
776 J_ASSERT(transaction->t_checkpoint_list == NULL); 776 J_ASSERT(transaction->t_checkpoint_list == NULL);
777 J_ASSERT(transaction->t_checkpoint_io_list == NULL); 777 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
778 J_ASSERT(transaction->t_updates == 0); 778 J_ASSERT(atomic_read(&transaction->t_updates) == 0);
779 J_ASSERT(journal->j_committing_transaction != transaction); 779 J_ASSERT(journal->j_committing_transaction != transaction);
780 J_ASSERT(journal->j_running_transaction != transaction); 780 J_ASSERT(journal->j_running_transaction != transaction);
781 781
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 75716d3d2be0..f52e5e8049f1 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -150,11 +150,11 @@ static int journal_submit_commit_record(journal_t *journal,
150 */ 150 */
151 if (ret == -EOPNOTSUPP && barrier_done) { 151 if (ret == -EOPNOTSUPP && barrier_done) {
152 printk(KERN_WARNING 152 printk(KERN_WARNING
153 "JBD: barrier-based sync failed on %s - " 153 "JBD2: Disabling barriers on %s, "
154 "disabling barriers\n", journal->j_devname); 154 "not supported by device\n", journal->j_devname);
155 spin_lock(&journal->j_state_lock); 155 write_lock(&journal->j_state_lock);
156 journal->j_flags &= ~JBD2_BARRIER; 156 journal->j_flags &= ~JBD2_BARRIER;
157 spin_unlock(&journal->j_state_lock); 157 write_unlock(&journal->j_state_lock);
158 158
159 /* And try again, without the barrier */ 159 /* And try again, without the barrier */
160 lock_buffer(bh); 160 lock_buffer(bh);
@@ -180,11 +180,11 @@ retry:
180 wait_on_buffer(bh); 180 wait_on_buffer(bh);
181 if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { 181 if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
182 printk(KERN_WARNING 182 printk(KERN_WARNING
183 "JBD2: wait_on_commit_record: sync failed on %s - " 183 "JBD2: %s: disabling barries on %s - not supported "
184 "disabling barriers\n", journal->j_devname); 184 "by device\n", __func__, journal->j_devname);
185 spin_lock(&journal->j_state_lock); 185 write_lock(&journal->j_state_lock);
186 journal->j_flags &= ~JBD2_BARRIER; 186 journal->j_flags &= ~JBD2_BARRIER;
187 spin_unlock(&journal->j_state_lock); 187 write_unlock(&journal->j_state_lock);
188 188
189 lock_buffer(bh); 189 lock_buffer(bh);
190 clear_buffer_dirty(bh); 190 clear_buffer_dirty(bh);
@@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
400 jbd_debug(1, "JBD: starting commit of transaction %d\n", 400 jbd_debug(1, "JBD: starting commit of transaction %d\n",
401 commit_transaction->t_tid); 401 commit_transaction->t_tid);
402 402
403 spin_lock(&journal->j_state_lock); 403 write_lock(&journal->j_state_lock);
404 commit_transaction->t_state = T_LOCKED; 404 commit_transaction->t_state = T_LOCKED;
405 405
406 /* 406 /*
@@ -417,23 +417,23 @@ void jbd2_journal_commit_transaction(journal_t *journal)
417 stats.run.rs_locked); 417 stats.run.rs_locked);
418 418
419 spin_lock(&commit_transaction->t_handle_lock); 419 spin_lock(&commit_transaction->t_handle_lock);
420 while (commit_transaction->t_updates) { 420 while (atomic_read(&commit_transaction->t_updates)) {
421 DEFINE_WAIT(wait); 421 DEFINE_WAIT(wait);
422 422
423 prepare_to_wait(&journal->j_wait_updates, &wait, 423 prepare_to_wait(&journal->j_wait_updates, &wait,
424 TASK_UNINTERRUPTIBLE); 424 TASK_UNINTERRUPTIBLE);
425 if (commit_transaction->t_updates) { 425 if (atomic_read(&commit_transaction->t_updates)) {
426 spin_unlock(&commit_transaction->t_handle_lock); 426 spin_unlock(&commit_transaction->t_handle_lock);
427 spin_unlock(&journal->j_state_lock); 427 write_unlock(&journal->j_state_lock);
428 schedule(); 428 schedule();
429 spin_lock(&journal->j_state_lock); 429 write_lock(&journal->j_state_lock);
430 spin_lock(&commit_transaction->t_handle_lock); 430 spin_lock(&commit_transaction->t_handle_lock);
431 } 431 }
432 finish_wait(&journal->j_wait_updates, &wait); 432 finish_wait(&journal->j_wait_updates, &wait);
433 } 433 }
434 spin_unlock(&commit_transaction->t_handle_lock); 434 spin_unlock(&commit_transaction->t_handle_lock);
435 435
436 J_ASSERT (commit_transaction->t_outstanding_credits <= 436 J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
437 journal->j_max_transaction_buffers); 437 journal->j_max_transaction_buffers);
438 438
439 /* 439 /*
@@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
497 start_time = ktime_get(); 497 start_time = ktime_get();
498 commit_transaction->t_log_start = journal->j_head; 498 commit_transaction->t_log_start = journal->j_head;
499 wake_up(&journal->j_wait_transaction_locked); 499 wake_up(&journal->j_wait_transaction_locked);
500 spin_unlock(&journal->j_state_lock); 500 write_unlock(&journal->j_state_lock);
501 501
502 jbd_debug (3, "JBD: commit phase 2\n"); 502 jbd_debug (3, "JBD: commit phase 2\n");
503 503
@@ -519,19 +519,20 @@ void jbd2_journal_commit_transaction(journal_t *journal)
519 * transaction! Now comes the tricky part: we need to write out 519 * transaction! Now comes the tricky part: we need to write out
520 * metadata. Loop over the transaction's entire buffer list: 520 * metadata. Loop over the transaction's entire buffer list:
521 */ 521 */
522 spin_lock(&journal->j_state_lock); 522 write_lock(&journal->j_state_lock);
523 commit_transaction->t_state = T_COMMIT; 523 commit_transaction->t_state = T_COMMIT;
524 spin_unlock(&journal->j_state_lock); 524 write_unlock(&journal->j_state_lock);
525 525
526 trace_jbd2_commit_logging(journal, commit_transaction); 526 trace_jbd2_commit_logging(journal, commit_transaction);
527 stats.run.rs_logging = jiffies; 527 stats.run.rs_logging = jiffies;
528 stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, 528 stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
529 stats.run.rs_logging); 529 stats.run.rs_logging);
530 stats.run.rs_blocks = commit_transaction->t_outstanding_credits; 530 stats.run.rs_blocks =
531 atomic_read(&commit_transaction->t_outstanding_credits);
531 stats.run.rs_blocks_logged = 0; 532 stats.run.rs_blocks_logged = 0;
532 533
533 J_ASSERT(commit_transaction->t_nr_buffers <= 534 J_ASSERT(commit_transaction->t_nr_buffers <=
534 commit_transaction->t_outstanding_credits); 535 atomic_read(&commit_transaction->t_outstanding_credits));
535 536
536 err = 0; 537 err = 0;
537 descriptor = NULL; 538 descriptor = NULL;
@@ -616,7 +617,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
616 * the free space in the log, but this counter is changed 617 * the free space in the log, but this counter is changed
617 * by jbd2_journal_next_log_block() also. 618 * by jbd2_journal_next_log_block() also.
618 */ 619 */
619 commit_transaction->t_outstanding_credits--; 620 atomic_dec(&commit_transaction->t_outstanding_credits);
620 621
621 /* Bump b_count to prevent truncate from stumbling over 622 /* Bump b_count to prevent truncate from stumbling over
622 the shadowed buffer! @@@ This can go if we ever get 623 the shadowed buffer! @@@ This can go if we ever get
@@ -977,7 +978,7 @@ restart_loop:
977 * __jbd2_journal_drop_transaction(). Otherwise we could race with 978 * __jbd2_journal_drop_transaction(). Otherwise we could race with
978 * other checkpointing code processing the transaction... 979 * other checkpointing code processing the transaction...
979 */ 980 */
980 spin_lock(&journal->j_state_lock); 981 write_lock(&journal->j_state_lock);
981 spin_lock(&journal->j_list_lock); 982 spin_lock(&journal->j_list_lock);
982 /* 983 /*
983 * Now recheck if some buffers did not get attached to the transaction 984 * Now recheck if some buffers did not get attached to the transaction
@@ -985,7 +986,7 @@ restart_loop:
985 */ 986 */
986 if (commit_transaction->t_forget) { 987 if (commit_transaction->t_forget) {
987 spin_unlock(&journal->j_list_lock); 988 spin_unlock(&journal->j_list_lock);
988 spin_unlock(&journal->j_state_lock); 989 write_unlock(&journal->j_state_lock);
989 goto restart_loop; 990 goto restart_loop;
990 } 991 }
991 992
@@ -1003,7 +1004,8 @@ restart_loop:
1003 * File the transaction statistics 1004 * File the transaction statistics
1004 */ 1005 */
1005 stats.ts_tid = commit_transaction->t_tid; 1006 stats.ts_tid = commit_transaction->t_tid;
1006 stats.run.rs_handle_count = commit_transaction->t_handle_count; 1007 stats.run.rs_handle_count =
1008 atomic_read(&commit_transaction->t_handle_count);
1007 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, 1009 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
1008 commit_transaction->t_tid, &stats.run); 1010 commit_transaction->t_tid, &stats.run);
1009 1011
@@ -1037,7 +1039,7 @@ restart_loop:
1037 journal->j_average_commit_time*3) / 4; 1039 journal->j_average_commit_time*3) / 4;
1038 else 1040 else
1039 journal->j_average_commit_time = commit_time; 1041 journal->j_average_commit_time = commit_time;
1040 spin_unlock(&journal->j_state_lock); 1042 write_unlock(&journal->j_state_lock);
1041 1043
1042 if (commit_transaction->t_checkpoint_list == NULL && 1044 if (commit_transaction->t_checkpoint_list == NULL &&
1043 commit_transaction->t_checkpoint_io_list == NULL) { 1045 commit_transaction->t_checkpoint_io_list == NULL) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 036880895bfc..ad5866aaf0f9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -41,6 +41,7 @@
41#include <linux/hash.h> 41#include <linux/hash.h>
42#include <linux/log2.h> 42#include <linux/log2.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/backing-dev.h>
44 45
45#define CREATE_TRACE_POINTS 46#define CREATE_TRACE_POINTS
46#include <trace/events/jbd2.h> 47#include <trace/events/jbd2.h>
@@ -48,8 +49,6 @@
48#include <asm/uaccess.h> 49#include <asm/uaccess.h>
49#include <asm/page.h> 50#include <asm/page.h>
50 51
51EXPORT_SYMBOL(jbd2_journal_start);
52EXPORT_SYMBOL(jbd2_journal_restart);
53EXPORT_SYMBOL(jbd2_journal_extend); 52EXPORT_SYMBOL(jbd2_journal_extend);
54EXPORT_SYMBOL(jbd2_journal_stop); 53EXPORT_SYMBOL(jbd2_journal_stop);
55EXPORT_SYMBOL(jbd2_journal_lock_updates); 54EXPORT_SYMBOL(jbd2_journal_lock_updates);
@@ -143,7 +142,7 @@ static int kjournald2(void *arg)
143 /* 142 /*
144 * And now, wait forever for commit wakeup events. 143 * And now, wait forever for commit wakeup events.
145 */ 144 */
146 spin_lock(&journal->j_state_lock); 145 write_lock(&journal->j_state_lock);
147 146
148loop: 147loop:
149 if (journal->j_flags & JBD2_UNMOUNT) 148 if (journal->j_flags & JBD2_UNMOUNT)
@@ -154,10 +153,10 @@ loop:
154 153
155 if (journal->j_commit_sequence != journal->j_commit_request) { 154 if (journal->j_commit_sequence != journal->j_commit_request) {
156 jbd_debug(1, "OK, requests differ\n"); 155 jbd_debug(1, "OK, requests differ\n");
157 spin_unlock(&journal->j_state_lock); 156 write_unlock(&journal->j_state_lock);
158 del_timer_sync(&journal->j_commit_timer); 157 del_timer_sync(&journal->j_commit_timer);
159 jbd2_journal_commit_transaction(journal); 158 jbd2_journal_commit_transaction(journal);
160 spin_lock(&journal->j_state_lock); 159 write_lock(&journal->j_state_lock);
161 goto loop; 160 goto loop;
162 } 161 }
163 162
@@ -169,9 +168,9 @@ loop:
169 * be already stopped. 168 * be already stopped.
170 */ 169 */
171 jbd_debug(1, "Now suspending kjournald2\n"); 170 jbd_debug(1, "Now suspending kjournald2\n");
172 spin_unlock(&journal->j_state_lock); 171 write_unlock(&journal->j_state_lock);
173 refrigerator(); 172 refrigerator();
174 spin_lock(&journal->j_state_lock); 173 write_lock(&journal->j_state_lock);
175 } else { 174 } else {
176 /* 175 /*
177 * We assume on resume that commits are already there, 176 * We assume on resume that commits are already there,
@@ -191,9 +190,9 @@ loop:
191 if (journal->j_flags & JBD2_UNMOUNT) 190 if (journal->j_flags & JBD2_UNMOUNT)
192 should_sleep = 0; 191 should_sleep = 0;
193 if (should_sleep) { 192 if (should_sleep) {
194 spin_unlock(&journal->j_state_lock); 193 write_unlock(&journal->j_state_lock);
195 schedule(); 194 schedule();
196 spin_lock(&journal->j_state_lock); 195 write_lock(&journal->j_state_lock);
197 } 196 }
198 finish_wait(&journal->j_wait_commit, &wait); 197 finish_wait(&journal->j_wait_commit, &wait);
199 } 198 }
@@ -211,7 +210,7 @@ loop:
211 goto loop; 210 goto loop;
212 211
213end_loop: 212end_loop:
214 spin_unlock(&journal->j_state_lock); 213 write_unlock(&journal->j_state_lock);
215 del_timer_sync(&journal->j_commit_timer); 214 del_timer_sync(&journal->j_commit_timer);
216 journal->j_task = NULL; 215 journal->j_task = NULL;
217 wake_up(&journal->j_wait_done_commit); 216 wake_up(&journal->j_wait_done_commit);
@@ -234,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
234 233
235static void journal_kill_thread(journal_t *journal) 234static void journal_kill_thread(journal_t *journal)
236{ 235{
237 spin_lock(&journal->j_state_lock); 236 write_lock(&journal->j_state_lock);
238 journal->j_flags |= JBD2_UNMOUNT; 237 journal->j_flags |= JBD2_UNMOUNT;
239 238
240 while (journal->j_task) { 239 while (journal->j_task) {
241 wake_up(&journal->j_wait_commit); 240 wake_up(&journal->j_wait_commit);
242 spin_unlock(&journal->j_state_lock); 241 write_unlock(&journal->j_state_lock);
243 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 242 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
244 spin_lock(&journal->j_state_lock); 243 write_lock(&journal->j_state_lock);
245 } 244 }
246 spin_unlock(&journal->j_state_lock); 245 write_unlock(&journal->j_state_lock);
247} 246}
248 247
249/* 248/*
@@ -310,7 +309,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
310 */ 309 */
311 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 310 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
312 311
313 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 312retry_alloc:
313 new_bh = alloc_buffer_head(GFP_NOFS);
314 if (!new_bh) {
315 /*
316 * Failure is not an option, but __GFP_NOFAIL is going
317 * away; so we retry ourselves here.
318 */
319 congestion_wait(BLK_RW_ASYNC, HZ/50);
320 goto retry_alloc;
321 }
322
314 /* keep subsequent assertions sane */ 323 /* keep subsequent assertions sane */
315 new_bh->b_state = 0; 324 new_bh->b_state = 0;
316 init_buffer(new_bh, NULL, NULL); 325 init_buffer(new_bh, NULL, NULL);
@@ -442,7 +451,7 @@ int __jbd2_log_space_left(journal_t *journal)
442{ 451{
443 int left = journal->j_free; 452 int left = journal->j_free;
444 453
445 assert_spin_locked(&journal->j_state_lock); 454 /* assert_spin_locked(&journal->j_state_lock); */
446 455
447 /* 456 /*
448 * Be pessimistic here about the number of those free blocks which 457 * Be pessimistic here about the number of those free blocks which
@@ -487,9 +496,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
487{ 496{
488 int ret; 497 int ret;
489 498
490 spin_lock(&journal->j_state_lock); 499 write_lock(&journal->j_state_lock);
491 ret = __jbd2_log_start_commit(journal, tid); 500 ret = __jbd2_log_start_commit(journal, tid);
492 spin_unlock(&journal->j_state_lock); 501 write_unlock(&journal->j_state_lock);
493 return ret; 502 return ret;
494} 503}
495 504
@@ -508,7 +517,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
508 transaction_t *transaction = NULL; 517 transaction_t *transaction = NULL;
509 tid_t tid; 518 tid_t tid;
510 519
511 spin_lock(&journal->j_state_lock); 520 read_lock(&journal->j_state_lock);
512 if (journal->j_running_transaction && !current->journal_info) { 521 if (journal->j_running_transaction && !current->journal_info) {
513 transaction = journal->j_running_transaction; 522 transaction = journal->j_running_transaction;
514 __jbd2_log_start_commit(journal, transaction->t_tid); 523 __jbd2_log_start_commit(journal, transaction->t_tid);
@@ -516,12 +525,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
516 transaction = journal->j_committing_transaction; 525 transaction = journal->j_committing_transaction;
517 526
518 if (!transaction) { 527 if (!transaction) {
519 spin_unlock(&journal->j_state_lock); 528 read_unlock(&journal->j_state_lock);
520 return 0; /* Nothing to retry */ 529 return 0; /* Nothing to retry */
521 } 530 }
522 531
523 tid = transaction->t_tid; 532 tid = transaction->t_tid;
524 spin_unlock(&journal->j_state_lock); 533 read_unlock(&journal->j_state_lock);
525 jbd2_log_wait_commit(journal, tid); 534 jbd2_log_wait_commit(journal, tid);
526 return 1; 535 return 1;
527} 536}
@@ -535,7 +544,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
535{ 544{
536 int ret = 0; 545 int ret = 0;
537 546
538 spin_lock(&journal->j_state_lock); 547 write_lock(&journal->j_state_lock);
539 if (journal->j_running_transaction) { 548 if (journal->j_running_transaction) {
540 tid_t tid = journal->j_running_transaction->t_tid; 549 tid_t tid = journal->j_running_transaction->t_tid;
541 550
@@ -554,7 +563,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
554 *ptid = journal->j_committing_transaction->t_tid; 563 *ptid = journal->j_committing_transaction->t_tid;
555 ret = 1; 564 ret = 1;
556 } 565 }
557 spin_unlock(&journal->j_state_lock); 566 write_unlock(&journal->j_state_lock);
558 return ret; 567 return ret;
559} 568}
560 569
@@ -566,26 +575,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
566{ 575{
567 int err = 0; 576 int err = 0;
568 577
578 read_lock(&journal->j_state_lock);
569#ifdef CONFIG_JBD2_DEBUG 579#ifdef CONFIG_JBD2_DEBUG
570 spin_lock(&journal->j_state_lock);
571 if (!tid_geq(journal->j_commit_request, tid)) { 580 if (!tid_geq(journal->j_commit_request, tid)) {
572 printk(KERN_EMERG 581 printk(KERN_EMERG
573 "%s: error: j_commit_request=%d, tid=%d\n", 582 "%s: error: j_commit_request=%d, tid=%d\n",
574 __func__, journal->j_commit_request, tid); 583 __func__, journal->j_commit_request, tid);
575 } 584 }
576 spin_unlock(&journal->j_state_lock);
577#endif 585#endif
578 spin_lock(&journal->j_state_lock);
579 while (tid_gt(tid, journal->j_commit_sequence)) { 586 while (tid_gt(tid, journal->j_commit_sequence)) {
580 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 587 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
581 tid, journal->j_commit_sequence); 588 tid, journal->j_commit_sequence);
582 wake_up(&journal->j_wait_commit); 589 wake_up(&journal->j_wait_commit);
583 spin_unlock(&journal->j_state_lock); 590 read_unlock(&journal->j_state_lock);
584 wait_event(journal->j_wait_done_commit, 591 wait_event(journal->j_wait_done_commit,
585 !tid_gt(tid, journal->j_commit_sequence)); 592 !tid_gt(tid, journal->j_commit_sequence));
586 spin_lock(&journal->j_state_lock); 593 read_lock(&journal->j_state_lock);
587 } 594 }
588 spin_unlock(&journal->j_state_lock); 595 read_unlock(&journal->j_state_lock);
589 596
590 if (unlikely(is_journal_aborted(journal))) { 597 if (unlikely(is_journal_aborted(journal))) {
591 printk(KERN_EMERG "journal commit I/O error\n"); 598 printk(KERN_EMERG "journal commit I/O error\n");
@@ -602,7 +609,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
602{ 609{
603 unsigned long blocknr; 610 unsigned long blocknr;
604 611
605 spin_lock(&journal->j_state_lock); 612 write_lock(&journal->j_state_lock);
606 J_ASSERT(journal->j_free > 1); 613 J_ASSERT(journal->j_free > 1);
607 614
608 blocknr = journal->j_head; 615 blocknr = journal->j_head;
@@ -610,7 +617,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
610 journal->j_free--; 617 journal->j_free--;
611 if (journal->j_head == journal->j_last) 618 if (journal->j_head == journal->j_last)
612 journal->j_head = journal->j_first; 619 journal->j_head = journal->j_first;
613 spin_unlock(&journal->j_state_lock); 620 write_unlock(&journal->j_state_lock);
614 return jbd2_journal_bmap(journal, blocknr, retp); 621 return jbd2_journal_bmap(journal, blocknr, retp);
615} 622}
616 623
@@ -830,7 +837,7 @@ static journal_t * journal_init_common (void)
830 mutex_init(&journal->j_checkpoint_mutex); 837 mutex_init(&journal->j_checkpoint_mutex);
831 spin_lock_init(&journal->j_revoke_lock); 838 spin_lock_init(&journal->j_revoke_lock);
832 spin_lock_init(&journal->j_list_lock); 839 spin_lock_init(&journal->j_list_lock);
833 spin_lock_init(&journal->j_state_lock); 840 rwlock_init(&journal->j_state_lock);
834 841
835 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 842 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
836 journal->j_min_batch_time = 0; 843 journal->j_min_batch_time = 0;
@@ -1096,14 +1103,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1096 set_buffer_uptodate(bh); 1103 set_buffer_uptodate(bh);
1097 } 1104 }
1098 1105
1099 spin_lock(&journal->j_state_lock); 1106 read_lock(&journal->j_state_lock);
1100 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1107 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
1101 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1108 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1102 1109
1103 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1110 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1104 sb->s_start = cpu_to_be32(journal->j_tail); 1111 sb->s_start = cpu_to_be32(journal->j_tail);
1105 sb->s_errno = cpu_to_be32(journal->j_errno); 1112 sb->s_errno = cpu_to_be32(journal->j_errno);
1106 spin_unlock(&journal->j_state_lock); 1113 read_unlock(&journal->j_state_lock);
1107 1114
1108 BUFFER_TRACE(bh, "marking dirty"); 1115 BUFFER_TRACE(bh, "marking dirty");
1109 mark_buffer_dirty(bh); 1116 mark_buffer_dirty(bh);
@@ -1124,12 +1131,12 @@ out:
1124 * any future commit will have to be careful to update the 1131 * any future commit will have to be careful to update the
1125 * superblock again to re-record the true start of the log. */ 1132 * superblock again to re-record the true start of the log. */
1126 1133
1127 spin_lock(&journal->j_state_lock); 1134 write_lock(&journal->j_state_lock);
1128 if (sb->s_start) 1135 if (sb->s_start)
1129 journal->j_flags &= ~JBD2_FLUSHED; 1136 journal->j_flags &= ~JBD2_FLUSHED;
1130 else 1137 else
1131 journal->j_flags |= JBD2_FLUSHED; 1138 journal->j_flags |= JBD2_FLUSHED;
1132 spin_unlock(&journal->j_state_lock); 1139 write_unlock(&journal->j_state_lock);
1133} 1140}
1134 1141
1135/* 1142/*
@@ -1391,13 +1398,9 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
1391int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, 1398int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
1392 unsigned long ro, unsigned long incompat) 1399 unsigned long ro, unsigned long incompat)
1393{ 1400{
1394 journal_superblock_t *sb;
1395
1396 if (!compat && !ro && !incompat) 1401 if (!compat && !ro && !incompat)
1397 return 1; 1402 return 1;
1398 1403
1399 sb = journal->j_superblock;
1400
1401 /* We can support any known requested features iff the 1404 /* We can support any known requested features iff the
1402 * superblock is in version 2. Otherwise we fail to support any 1405 * superblock is in version 2. Otherwise we fail to support any
1403 * extended sb features. */ 1406 * extended sb features. */
@@ -1545,7 +1548,7 @@ int jbd2_journal_flush(journal_t *journal)
1545 transaction_t *transaction = NULL; 1548 transaction_t *transaction = NULL;
1546 unsigned long old_tail; 1549 unsigned long old_tail;
1547 1550
1548 spin_lock(&journal->j_state_lock); 1551 write_lock(&journal->j_state_lock);
1549 1552
1550 /* Force everything buffered to the log... */ 1553 /* Force everything buffered to the log... */
1551 if (journal->j_running_transaction) { 1554 if (journal->j_running_transaction) {
@@ -1558,10 +1561,10 @@ int jbd2_journal_flush(journal_t *journal)
1558 if (transaction) { 1561 if (transaction) {
1559 tid_t tid = transaction->t_tid; 1562 tid_t tid = transaction->t_tid;
1560 1563
1561 spin_unlock(&journal->j_state_lock); 1564 write_unlock(&journal->j_state_lock);
1562 jbd2_log_wait_commit(journal, tid); 1565 jbd2_log_wait_commit(journal, tid);
1563 } else { 1566 } else {
1564 spin_unlock(&journal->j_state_lock); 1567 write_unlock(&journal->j_state_lock);
1565 } 1568 }
1566 1569
1567 /* ...and flush everything in the log out to disk. */ 1570 /* ...and flush everything in the log out to disk. */
@@ -1585,12 +1588,12 @@ int jbd2_journal_flush(journal_t *journal)
1585 * the magic code for a fully-recovered superblock. Any future 1588 * the magic code for a fully-recovered superblock. Any future
1586 * commits of data to the journal will restore the current 1589 * commits of data to the journal will restore the current
1587 * s_start value. */ 1590 * s_start value. */
1588 spin_lock(&journal->j_state_lock); 1591 write_lock(&journal->j_state_lock);
1589 old_tail = journal->j_tail; 1592 old_tail = journal->j_tail;
1590 journal->j_tail = 0; 1593 journal->j_tail = 0;
1591 spin_unlock(&journal->j_state_lock); 1594 write_unlock(&journal->j_state_lock);
1592 jbd2_journal_update_superblock(journal, 1); 1595 jbd2_journal_update_superblock(journal, 1);
1593 spin_lock(&journal->j_state_lock); 1596 write_lock(&journal->j_state_lock);
1594 journal->j_tail = old_tail; 1597 journal->j_tail = old_tail;
1595 1598
1596 J_ASSERT(!journal->j_running_transaction); 1599 J_ASSERT(!journal->j_running_transaction);
@@ -1598,7 +1601,7 @@ int jbd2_journal_flush(journal_t *journal)
1598 J_ASSERT(!journal->j_checkpoint_transactions); 1601 J_ASSERT(!journal->j_checkpoint_transactions);
1599 J_ASSERT(journal->j_head == journal->j_tail); 1602 J_ASSERT(journal->j_head == journal->j_tail);
1600 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1603 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1601 spin_unlock(&journal->j_state_lock); 1604 write_unlock(&journal->j_state_lock);
1602 return 0; 1605 return 0;
1603} 1606}
1604 1607
@@ -1617,7 +1620,6 @@ int jbd2_journal_flush(journal_t *journal)
1617 1620
1618int jbd2_journal_wipe(journal_t *journal, int write) 1621int jbd2_journal_wipe(journal_t *journal, int write)
1619{ 1622{
1620 journal_superblock_t *sb;
1621 int err = 0; 1623 int err = 0;
1622 1624
1623 J_ASSERT (!(journal->j_flags & JBD2_LOADED)); 1625 J_ASSERT (!(journal->j_flags & JBD2_LOADED));
@@ -1626,8 +1628,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1626 if (err) 1628 if (err)
1627 return err; 1629 return err;
1628 1630
1629 sb = journal->j_superblock;
1630
1631 if (!journal->j_tail) 1631 if (!journal->j_tail)
1632 goto no_recovery; 1632 goto no_recovery;
1633 1633
@@ -1665,12 +1665,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
1665 printk(KERN_ERR "Aborting journal on device %s.\n", 1665 printk(KERN_ERR "Aborting journal on device %s.\n",
1666 journal->j_devname); 1666 journal->j_devname);
1667 1667
1668 spin_lock(&journal->j_state_lock); 1668 write_lock(&journal->j_state_lock);
1669 journal->j_flags |= JBD2_ABORT; 1669 journal->j_flags |= JBD2_ABORT;
1670 transaction = journal->j_running_transaction; 1670 transaction = journal->j_running_transaction;
1671 if (transaction) 1671 if (transaction)
1672 __jbd2_log_start_commit(journal, transaction->t_tid); 1672 __jbd2_log_start_commit(journal, transaction->t_tid);
1673 spin_unlock(&journal->j_state_lock); 1673 write_unlock(&journal->j_state_lock);
1674} 1674}
1675 1675
1676/* Soft abort: record the abort error status in the journal superblock, 1676/* Soft abort: record the abort error status in the journal superblock,
@@ -1755,12 +1755,12 @@ int jbd2_journal_errno(journal_t *journal)
1755{ 1755{
1756 int err; 1756 int err;
1757 1757
1758 spin_lock(&journal->j_state_lock); 1758 read_lock(&journal->j_state_lock);
1759 if (journal->j_flags & JBD2_ABORT) 1759 if (journal->j_flags & JBD2_ABORT)
1760 err = -EROFS; 1760 err = -EROFS;
1761 else 1761 else
1762 err = journal->j_errno; 1762 err = journal->j_errno;
1763 spin_unlock(&journal->j_state_lock); 1763 read_unlock(&journal->j_state_lock);
1764 return err; 1764 return err;
1765} 1765}
1766 1766
@@ -1775,12 +1775,12 @@ int jbd2_journal_clear_err(journal_t *journal)
1775{ 1775{
1776 int err = 0; 1776 int err = 0;
1777 1777
1778 spin_lock(&journal->j_state_lock); 1778 write_lock(&journal->j_state_lock);
1779 if (journal->j_flags & JBD2_ABORT) 1779 if (journal->j_flags & JBD2_ABORT)
1780 err = -EROFS; 1780 err = -EROFS;
1781 else 1781 else
1782 journal->j_errno = 0; 1782 journal->j_errno = 0;
1783 spin_unlock(&journal->j_state_lock); 1783 write_unlock(&journal->j_state_lock);
1784 return err; 1784 return err;
1785} 1785}
1786 1786
@@ -1793,10 +1793,10 @@ int jbd2_journal_clear_err(journal_t *journal)
1793 */ 1793 */
1794void jbd2_journal_ack_err(journal_t *journal) 1794void jbd2_journal_ack_err(journal_t *journal)
1795{ 1795{
1796 spin_lock(&journal->j_state_lock); 1796 write_lock(&journal->j_state_lock);
1797 if (journal->j_errno) 1797 if (journal->j_errno)
1798 journal->j_flags |= JBD2_ACK_ERR; 1798 journal->j_flags |= JBD2_ACK_ERR;
1799 spin_unlock(&journal->j_state_lock); 1799 write_unlock(&journal->j_state_lock);
1800} 1800}
1801 1801
1802int jbd2_journal_blocks_per_page(struct inode *inode) 1802int jbd2_journal_blocks_per_page(struct inode *inode)
@@ -2201,8 +2201,6 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
2201void jbd2_journal_release_jbd_inode(journal_t *journal, 2201void jbd2_journal_release_jbd_inode(journal_t *journal,
2202 struct jbd2_inode *jinode) 2202 struct jbd2_inode *jinode)
2203{ 2203{
2204 int writeout = 0;
2205
2206 if (!journal) 2204 if (!journal)
2207 return; 2205 return;
2208restart: 2206restart:
@@ -2219,9 +2217,6 @@ restart:
2219 goto restart; 2217 goto restart;
2220 } 2218 }
2221 2219
2222 /* Do we need to wait for data writeback? */
2223 if (journal->j_committing_transaction == jinode->i_transaction)
2224 writeout = 1;
2225 if (jinode->i_transaction) { 2220 if (jinode->i_transaction) {
2226 list_del(&jinode->i_list); 2221 list_del(&jinode->i_list);
2227 jinode->i_transaction = NULL; 2222 jinode->i_transaction = NULL;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 049281b7cb89..2bc4d5f116f1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -285,12 +285,10 @@ int jbd2_journal_recover(journal_t *journal)
285int jbd2_journal_skip_recovery(journal_t *journal) 285int jbd2_journal_skip_recovery(journal_t *journal)
286{ 286{
287 int err; 287 int err;
288 journal_superblock_t * sb;
289 288
290 struct recovery_info info; 289 struct recovery_info info;
291 290
292 memset (&info, 0, sizeof(info)); 291 memset (&info, 0, sizeof(info));
293 sb = journal->j_superblock;
294 292
295 err = do_one_pass(journal, &info, PASS_SCAN); 293 err = do_one_pass(journal, &info, PASS_SCAN);
296 294
@@ -299,7 +297,8 @@ int jbd2_journal_skip_recovery(journal_t *journal)
299 ++journal->j_transaction_sequence; 297 ++journal->j_transaction_sequence;
300 } else { 298 } else {
301#ifdef CONFIG_JBD2_DEBUG 299#ifdef CONFIG_JBD2_DEBUG
302 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 300 int dropped = info.end_transaction -
301 be32_to_cpu(journal->j_superblock->s_sequence);
303#endif 302#endif
304 jbd_debug(1, 303 jbd_debug(1,
305 "JBD: ignoring %d transaction%s from the journal.\n", 304 "JBD: ignoring %d transaction%s from the journal.\n",
@@ -365,11 +364,6 @@ static int do_one_pass(journal_t *journal,
365 int tag_bytes = journal_tag_bytes(journal); 364 int tag_bytes = journal_tag_bytes(journal);
366 __u32 crc32_sum = ~0; /* Transactional Checksums */ 365 __u32 crc32_sum = ~0; /* Transactional Checksums */
367 366
368 /* Precompute the maximum metadata descriptors in a descriptor block */
369 int MAX_BLOCKS_PER_DESC;
370 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
371 / tag_bytes);
372
373 /* 367 /*
374 * First thing is to establish what we expect to find in the log 368 * First thing is to establish what we expect to find in the log
375 * (in terms of transaction IDs), and where (in terms of log 369 * (in terms of transaction IDs), and where (in terms of log
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b8e0806681bb..d95cc9d0401d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -26,6 +26,8 @@
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/backing-dev.h>
30#include <linux/module.h>
29 31
30static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
31 33
@@ -53,6 +55,9 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
53 transaction->t_tid = journal->j_transaction_sequence++; 55 transaction->t_tid = journal->j_transaction_sequence++;
54 transaction->t_expires = jiffies + journal->j_commit_interval; 56 transaction->t_expires = jiffies + journal->j_commit_interval;
55 spin_lock_init(&transaction->t_handle_lock); 57 spin_lock_init(&transaction->t_handle_lock);
58 atomic_set(&transaction->t_updates, 0);
59 atomic_set(&transaction->t_outstanding_credits, 0);
60 atomic_set(&transaction->t_handle_count, 0);
56 INIT_LIST_HEAD(&transaction->t_inode_list); 61 INIT_LIST_HEAD(&transaction->t_inode_list);
57 INIT_LIST_HEAD(&transaction->t_private_list); 62 INIT_LIST_HEAD(&transaction->t_private_list);
58 63
@@ -83,65 +88,75 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
83 * transaction's buffer credits. 88 * transaction's buffer credits.
84 */ 89 */
85 90
86static int start_this_handle(journal_t *journal, handle_t *handle) 91static int start_this_handle(journal_t *journal, handle_t *handle,
92 int gfp_mask)
87{ 93{
88 transaction_t *transaction; 94 transaction_t *transaction;
89 int needed; 95 int needed;
90 int nblocks = handle->h_buffer_credits; 96 int nblocks = handle->h_buffer_credits;
91 transaction_t *new_transaction = NULL; 97 transaction_t *new_transaction = NULL;
92 int ret = 0;
93 unsigned long ts = jiffies; 98 unsigned long ts = jiffies;
94 99
95 if (nblocks > journal->j_max_transaction_buffers) { 100 if (nblocks > journal->j_max_transaction_buffers) {
96 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 101 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
97 current->comm, nblocks, 102 current->comm, nblocks,
98 journal->j_max_transaction_buffers); 103 journal->j_max_transaction_buffers);
99 ret = -ENOSPC; 104 return -ENOSPC;
100 goto out;
101 } 105 }
102 106
103alloc_transaction: 107alloc_transaction:
104 if (!journal->j_running_transaction) { 108 if (!journal->j_running_transaction) {
105 new_transaction = kzalloc(sizeof(*new_transaction), 109 new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
106 GFP_NOFS|__GFP_NOFAIL);
107 if (!new_transaction) { 110 if (!new_transaction) {
108 ret = -ENOMEM; 111 /*
109 goto out; 112 * If __GFP_FS is not present, then we may be
113 * being called from inside the fs writeback
114 * layer, so we MUST NOT fail. Since
115 * __GFP_NOFAIL is going away, we will arrange
116 * to retry the allocation ourselves.
117 */
118 if ((gfp_mask & __GFP_FS) == 0) {
119 congestion_wait(BLK_RW_ASYNC, HZ/50);
120 goto alloc_transaction;
121 }
122 return -ENOMEM;
110 } 123 }
111 } 124 }
112 125
113 jbd_debug(3, "New handle %p going live.\n", handle); 126 jbd_debug(3, "New handle %p going live.\n", handle);
114 127
115repeat:
116
117 /* 128 /*
118 * We need to hold j_state_lock until t_updates has been incremented, 129 * We need to hold j_state_lock until t_updates has been incremented,
119 * for proper journal barrier handling 130 * for proper journal barrier handling
120 */ 131 */
121 spin_lock(&journal->j_state_lock); 132repeat:
122repeat_locked: 133 read_lock(&journal->j_state_lock);
123 if (is_journal_aborted(journal) || 134 if (is_journal_aborted(journal) ||
124 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 135 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
125 spin_unlock(&journal->j_state_lock); 136 read_unlock(&journal->j_state_lock);
126 ret = -EROFS; 137 kfree(new_transaction);
127 goto out; 138 return -EROFS;
128 } 139 }
129 140
130 /* Wait on the journal's transaction barrier if necessary */ 141 /* Wait on the journal's transaction barrier if necessary */
131 if (journal->j_barrier_count) { 142 if (journal->j_barrier_count) {
132 spin_unlock(&journal->j_state_lock); 143 read_unlock(&journal->j_state_lock);
133 wait_event(journal->j_wait_transaction_locked, 144 wait_event(journal->j_wait_transaction_locked,
134 journal->j_barrier_count == 0); 145 journal->j_barrier_count == 0);
135 goto repeat; 146 goto repeat;
136 } 147 }
137 148
138 if (!journal->j_running_transaction) { 149 if (!journal->j_running_transaction) {
139 if (!new_transaction) { 150 read_unlock(&journal->j_state_lock);
140 spin_unlock(&journal->j_state_lock); 151 if (!new_transaction)
141 goto alloc_transaction; 152 goto alloc_transaction;
153 write_lock(&journal->j_state_lock);
154 if (!journal->j_running_transaction) {
155 jbd2_get_transaction(journal, new_transaction);
156 new_transaction = NULL;
142 } 157 }
143 jbd2_get_transaction(journal, new_transaction); 158 write_unlock(&journal->j_state_lock);
144 new_transaction = NULL; 159 goto repeat;
145 } 160 }
146 161
147 transaction = journal->j_running_transaction; 162 transaction = journal->j_running_transaction;
@@ -155,7 +170,7 @@ repeat_locked:
155 170
156 prepare_to_wait(&journal->j_wait_transaction_locked, 171 prepare_to_wait(&journal->j_wait_transaction_locked,
157 &wait, TASK_UNINTERRUPTIBLE); 172 &wait, TASK_UNINTERRUPTIBLE);
158 spin_unlock(&journal->j_state_lock); 173 read_unlock(&journal->j_state_lock);
159 schedule(); 174 schedule();
160 finish_wait(&journal->j_wait_transaction_locked, &wait); 175 finish_wait(&journal->j_wait_transaction_locked, &wait);
161 goto repeat; 176 goto repeat;
@@ -166,8 +181,8 @@ repeat_locked:
166 * buffers requested by this operation, we need to stall pending a log 181 * buffers requested by this operation, we need to stall pending a log
167 * checkpoint to free some more log space. 182 * checkpoint to free some more log space.
168 */ 183 */
169 spin_lock(&transaction->t_handle_lock); 184 needed = atomic_add_return(nblocks,
170 needed = transaction->t_outstanding_credits + nblocks; 185 &transaction->t_outstanding_credits);
171 186
172 if (needed > journal->j_max_transaction_buffers) { 187 if (needed > journal->j_max_transaction_buffers) {
173 /* 188 /*
@@ -178,11 +193,11 @@ repeat_locked:
178 DEFINE_WAIT(wait); 193 DEFINE_WAIT(wait);
179 194
180 jbd_debug(2, "Handle %p starting new commit...\n", handle); 195 jbd_debug(2, "Handle %p starting new commit...\n", handle);
181 spin_unlock(&transaction->t_handle_lock); 196 atomic_sub(nblocks, &transaction->t_outstanding_credits);
182 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 197 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
183 TASK_UNINTERRUPTIBLE); 198 TASK_UNINTERRUPTIBLE);
184 __jbd2_log_start_commit(journal, transaction->t_tid); 199 __jbd2_log_start_commit(journal, transaction->t_tid);
185 spin_unlock(&journal->j_state_lock); 200 read_unlock(&journal->j_state_lock);
186 schedule(); 201 schedule();
187 finish_wait(&journal->j_wait_transaction_locked, &wait); 202 finish_wait(&journal->j_wait_transaction_locked, &wait);
188 goto repeat; 203 goto repeat;
@@ -215,35 +230,48 @@ repeat_locked:
215 */ 230 */
216 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { 231 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
217 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); 232 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
218 spin_unlock(&transaction->t_handle_lock); 233 atomic_sub(nblocks, &transaction->t_outstanding_credits);
219 __jbd2_log_wait_for_space(journal); 234 read_unlock(&journal->j_state_lock);
220 goto repeat_locked; 235 write_lock(&journal->j_state_lock);
236 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
237 __jbd2_log_wait_for_space(journal);
238 write_unlock(&journal->j_state_lock);
239 goto repeat;
221 } 240 }
222 241
223 /* OK, account for the buffers that this operation expects to 242 /* OK, account for the buffers that this operation expects to
224 * use and add the handle to the running transaction. */ 243 * use and add the handle to the running transaction.
225 244 *
226 if (time_after(transaction->t_start, ts)) { 245 * In order for t_max_wait to be reliable, it must be
246 * protected by a lock. But doing so will mean that
247 * start_this_handle() can not be run in parallel on SMP
248 * systems, which limits our scalability. So we only enable
249 * it when debugging is enabled. We may want to use a
250 * separate flag, eventually, so we can enable this
251 * independently of debugging.
252 */
253#ifdef CONFIG_JBD2_DEBUG
254 if (jbd2_journal_enable_debug &&
255 time_after(transaction->t_start, ts)) {
227 ts = jbd2_time_diff(ts, transaction->t_start); 256 ts = jbd2_time_diff(ts, transaction->t_start);
257 spin_lock(&transaction->t_handle_lock);
228 if (ts > transaction->t_max_wait) 258 if (ts > transaction->t_max_wait)
229 transaction->t_max_wait = ts; 259 transaction->t_max_wait = ts;
260 spin_unlock(&transaction->t_handle_lock);
230 } 261 }
231 262#endif
232 handle->h_transaction = transaction; 263 handle->h_transaction = transaction;
233 transaction->t_outstanding_credits += nblocks; 264 atomic_inc(&transaction->t_updates);
234 transaction->t_updates++; 265 atomic_inc(&transaction->t_handle_count);
235 transaction->t_handle_count++;
236 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", 266 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
237 handle, nblocks, transaction->t_outstanding_credits, 267 handle, nblocks,
268 atomic_read(&transaction->t_outstanding_credits),
238 __jbd2_log_space_left(journal)); 269 __jbd2_log_space_left(journal));
239 spin_unlock(&transaction->t_handle_lock); 270 read_unlock(&journal->j_state_lock);
240 spin_unlock(&journal->j_state_lock);
241 271
242 lock_map_acquire(&handle->h_lockdep_map); 272 lock_map_acquire(&handle->h_lockdep_map);
243out: 273 kfree(new_transaction);
244 if (unlikely(new_transaction)) /* It's usually NULL */ 274 return 0;
245 kfree(new_transaction);
246 return ret;
247} 275}
248 276
249static struct lock_class_key jbd2_handle_key; 277static struct lock_class_key jbd2_handle_key;
@@ -278,7 +306,7 @@ static handle_t *new_handle(int nblocks)
278 * 306 *
279 * Return a pointer to a newly allocated handle, or NULL on failure 307 * Return a pointer to a newly allocated handle, or NULL on failure
280 */ 308 */
281handle_t *jbd2_journal_start(journal_t *journal, int nblocks) 309handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
282{ 310{
283 handle_t *handle = journal_current_handle(); 311 handle_t *handle = journal_current_handle();
284 int err; 312 int err;
@@ -298,7 +326,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
298 326
299 current->journal_info = handle; 327 current->journal_info = handle;
300 328
301 err = start_this_handle(journal, handle); 329 err = start_this_handle(journal, handle, gfp_mask);
302 if (err < 0) { 330 if (err < 0) {
303 jbd2_free_handle(handle); 331 jbd2_free_handle(handle);
304 current->journal_info = NULL; 332 current->journal_info = NULL;
@@ -308,6 +336,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
308out: 336out:
309 return handle; 337 return handle;
310} 338}
339EXPORT_SYMBOL(jbd2__journal_start);
340
341
342handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
343{
344 return jbd2__journal_start(journal, nblocks, GFP_NOFS);
345}
346EXPORT_SYMBOL(jbd2_journal_start);
347
311 348
312/** 349/**
313 * int jbd2_journal_extend() - extend buffer credits. 350 * int jbd2_journal_extend() - extend buffer credits.
@@ -342,7 +379,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
342 379
343 result = 1; 380 result = 1;
344 381
345 spin_lock(&journal->j_state_lock); 382 read_lock(&journal->j_state_lock);
346 383
347 /* Don't extend a locked-down transaction! */ 384 /* Don't extend a locked-down transaction! */
348 if (handle->h_transaction->t_state != T_RUNNING) { 385 if (handle->h_transaction->t_state != T_RUNNING) {
@@ -352,7 +389,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
352 } 389 }
353 390
354 spin_lock(&transaction->t_handle_lock); 391 spin_lock(&transaction->t_handle_lock);
355 wanted = transaction->t_outstanding_credits + nblocks; 392 wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks;
356 393
357 if (wanted > journal->j_max_transaction_buffers) { 394 if (wanted > journal->j_max_transaction_buffers) {
358 jbd_debug(3, "denied handle %p %d blocks: " 395 jbd_debug(3, "denied handle %p %d blocks: "
@@ -367,14 +404,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
367 } 404 }
368 405
369 handle->h_buffer_credits += nblocks; 406 handle->h_buffer_credits += nblocks;
370 transaction->t_outstanding_credits += nblocks; 407 atomic_add(nblocks, &transaction->t_outstanding_credits);
371 result = 0; 408 result = 0;
372 409
373 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); 410 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
374unlock: 411unlock:
375 spin_unlock(&transaction->t_handle_lock); 412 spin_unlock(&transaction->t_handle_lock);
376error_out: 413error_out:
377 spin_unlock(&journal->j_state_lock); 414 read_unlock(&journal->j_state_lock);
378out: 415out:
379 return result; 416 return result;
380} 417}
@@ -394,8 +431,7 @@ out:
394 * transaction capabable of guaranteeing the requested number of 431 * transaction capabable of guaranteeing the requested number of
395 * credits. 432 * credits.
396 */ 433 */
397 434int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
398int jbd2_journal_restart(handle_t *handle, int nblocks)
399{ 435{
400 transaction_t *transaction = handle->h_transaction; 436 transaction_t *transaction = handle->h_transaction;
401 journal_t *journal = transaction->t_journal; 437 journal_t *journal = transaction->t_journal;
@@ -410,29 +446,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
410 * First unlink the handle from its current transaction, and start the 446 * First unlink the handle from its current transaction, and start the
411 * commit on that. 447 * commit on that.
412 */ 448 */
413 J_ASSERT(transaction->t_updates > 0); 449 J_ASSERT(atomic_read(&transaction->t_updates) > 0);
414 J_ASSERT(journal_current_handle() == handle); 450 J_ASSERT(journal_current_handle() == handle);
415 451
416 spin_lock(&journal->j_state_lock); 452 read_lock(&journal->j_state_lock);
417 spin_lock(&transaction->t_handle_lock); 453 spin_lock(&transaction->t_handle_lock);
418 transaction->t_outstanding_credits -= handle->h_buffer_credits; 454 atomic_sub(handle->h_buffer_credits,
419 transaction->t_updates--; 455 &transaction->t_outstanding_credits);
420 456 if (atomic_dec_and_test(&transaction->t_updates))
421 if (!transaction->t_updates)
422 wake_up(&journal->j_wait_updates); 457 wake_up(&journal->j_wait_updates);
423 spin_unlock(&transaction->t_handle_lock); 458 spin_unlock(&transaction->t_handle_lock);
424 459
425 jbd_debug(2, "restarting handle %p\n", handle); 460 jbd_debug(2, "restarting handle %p\n", handle);
426 __jbd2_log_start_commit(journal, transaction->t_tid); 461 __jbd2_log_start_commit(journal, transaction->t_tid);
427 spin_unlock(&journal->j_state_lock); 462 read_unlock(&journal->j_state_lock);
428 463
429 lock_map_release(&handle->h_lockdep_map); 464 lock_map_release(&handle->h_lockdep_map);
430 handle->h_buffer_credits = nblocks; 465 handle->h_buffer_credits = nblocks;
431 ret = start_this_handle(journal, handle); 466 ret = start_this_handle(journal, handle, gfp_mask);
432 return ret; 467 return ret;
433} 468}
469EXPORT_SYMBOL(jbd2__journal_restart);
434 470
435 471
472int jbd2_journal_restart(handle_t *handle, int nblocks)
473{
474 return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
475}
476EXPORT_SYMBOL(jbd2_journal_restart);
477
436/** 478/**
437 * void jbd2_journal_lock_updates () - establish a transaction barrier. 479 * void jbd2_journal_lock_updates () - establish a transaction barrier.
438 * @journal: Journal to establish a barrier on. 480 * @journal: Journal to establish a barrier on.
@@ -447,7 +489,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
447{ 489{
448 DEFINE_WAIT(wait); 490 DEFINE_WAIT(wait);
449 491
450 spin_lock(&journal->j_state_lock); 492 write_lock(&journal->j_state_lock);
451 ++journal->j_barrier_count; 493 ++journal->j_barrier_count;
452 494
453 /* Wait until there are no running updates */ 495 /* Wait until there are no running updates */
@@ -458,19 +500,19 @@ void jbd2_journal_lock_updates(journal_t *journal)
458 break; 500 break;
459 501
460 spin_lock(&transaction->t_handle_lock); 502 spin_lock(&transaction->t_handle_lock);
461 if (!transaction->t_updates) { 503 if (!atomic_read(&transaction->t_updates)) {
462 spin_unlock(&transaction->t_handle_lock); 504 spin_unlock(&transaction->t_handle_lock);
463 break; 505 break;
464 } 506 }
465 prepare_to_wait(&journal->j_wait_updates, &wait, 507 prepare_to_wait(&journal->j_wait_updates, &wait,
466 TASK_UNINTERRUPTIBLE); 508 TASK_UNINTERRUPTIBLE);
467 spin_unlock(&transaction->t_handle_lock); 509 spin_unlock(&transaction->t_handle_lock);
468 spin_unlock(&journal->j_state_lock); 510 write_unlock(&journal->j_state_lock);
469 schedule(); 511 schedule();
470 finish_wait(&journal->j_wait_updates, &wait); 512 finish_wait(&journal->j_wait_updates, &wait);
471 spin_lock(&journal->j_state_lock); 513 write_lock(&journal->j_state_lock);
472 } 514 }
473 spin_unlock(&journal->j_state_lock); 515 write_unlock(&journal->j_state_lock);
474 516
475 /* 517 /*
476 * We have now established a barrier against other normal updates, but 518 * We have now established a barrier against other normal updates, but
@@ -494,9 +536,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
494 J_ASSERT(journal->j_barrier_count != 0); 536 J_ASSERT(journal->j_barrier_count != 0);
495 537
496 mutex_unlock(&journal->j_barrier); 538 mutex_unlock(&journal->j_barrier);
497 spin_lock(&journal->j_state_lock); 539 write_lock(&journal->j_state_lock);
498 --journal->j_barrier_count; 540 --journal->j_barrier_count;
499 spin_unlock(&journal->j_state_lock); 541 write_unlock(&journal->j_state_lock);
500 wake_up(&journal->j_wait_transaction_locked); 542 wake_up(&journal->j_wait_transaction_locked);
501} 543}
502 544
@@ -1238,7 +1280,8 @@ int jbd2_journal_stop(handle_t *handle)
1238{ 1280{
1239 transaction_t *transaction = handle->h_transaction; 1281 transaction_t *transaction = handle->h_transaction;
1240 journal_t *journal = transaction->t_journal; 1282 journal_t *journal = transaction->t_journal;
1241 int err; 1283 int err, wait_for_commit = 0;
1284 tid_t tid;
1242 pid_t pid; 1285 pid_t pid;
1243 1286
1244 J_ASSERT(journal_current_handle() == handle); 1287 J_ASSERT(journal_current_handle() == handle);
@@ -1246,7 +1289,7 @@ int jbd2_journal_stop(handle_t *handle)
1246 if (is_handle_aborted(handle)) 1289 if (is_handle_aborted(handle))
1247 err = -EIO; 1290 err = -EIO;
1248 else { 1291 else {
1249 J_ASSERT(transaction->t_updates > 0); 1292 J_ASSERT(atomic_read(&transaction->t_updates) > 0);
1250 err = 0; 1293 err = 0;
1251 } 1294 }
1252 1295
@@ -1291,9 +1334,9 @@ int jbd2_journal_stop(handle_t *handle)
1291 1334
1292 journal->j_last_sync_writer = pid; 1335 journal->j_last_sync_writer = pid;
1293 1336
1294 spin_lock(&journal->j_state_lock); 1337 read_lock(&journal->j_state_lock);
1295 commit_time = journal->j_average_commit_time; 1338 commit_time = journal->j_average_commit_time;
1296 spin_unlock(&journal->j_state_lock); 1339 read_unlock(&journal->j_state_lock);
1297 1340
1298 trans_time = ktime_to_ns(ktime_sub(ktime_get(), 1341 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1299 transaction->t_start_time)); 1342 transaction->t_start_time));
@@ -1314,14 +1357,8 @@ int jbd2_journal_stop(handle_t *handle)
1314 if (handle->h_sync) 1357 if (handle->h_sync)
1315 transaction->t_synchronous_commit = 1; 1358 transaction->t_synchronous_commit = 1;
1316 current->journal_info = NULL; 1359 current->journal_info = NULL;
1317 spin_lock(&transaction->t_handle_lock); 1360 atomic_sub(handle->h_buffer_credits,
1318 transaction->t_outstanding_credits -= handle->h_buffer_credits; 1361 &transaction->t_outstanding_credits);
1319 transaction->t_updates--;
1320 if (!transaction->t_updates) {
1321 wake_up(&journal->j_wait_updates);
1322 if (journal->j_barrier_count)
1323 wake_up(&journal->j_wait_transaction_locked);
1324 }
1325 1362
1326 /* 1363 /*
1327 * If the handle is marked SYNC, we need to set another commit 1364 * If the handle is marked SYNC, we need to set another commit
@@ -1330,15 +1367,13 @@ int jbd2_journal_stop(handle_t *handle)
1330 * transaction is too old now. 1367 * transaction is too old now.
1331 */ 1368 */
1332 if (handle->h_sync || 1369 if (handle->h_sync ||
1333 transaction->t_outstanding_credits > 1370 (atomic_read(&transaction->t_outstanding_credits) >
1334 journal->j_max_transaction_buffers || 1371 journal->j_max_transaction_buffers) ||
1335 time_after_eq(jiffies, transaction->t_expires)) { 1372 time_after_eq(jiffies, transaction->t_expires)) {
1336 /* Do this even for aborted journals: an abort still 1373 /* Do this even for aborted journals: an abort still
1337 * completes the commit thread, it just doesn't write 1374 * completes the commit thread, it just doesn't write
1338 * anything to disk. */ 1375 * anything to disk. */
1339 tid_t tid = transaction->t_tid;
1340 1376
1341 spin_unlock(&transaction->t_handle_lock);
1342 jbd_debug(2, "transaction too old, requesting commit for " 1377 jbd_debug(2, "transaction too old, requesting commit for "
1343 "handle %p\n", handle); 1378 "handle %p\n", handle);
1344 /* This is non-blocking */ 1379 /* This is non-blocking */
@@ -1349,11 +1384,25 @@ int jbd2_journal_stop(handle_t *handle)
1349 * to wait for the commit to complete. 1384 * to wait for the commit to complete.
1350 */ 1385 */
1351 if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 1386 if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1352 err = jbd2_log_wait_commit(journal, tid); 1387 wait_for_commit = 1;
1353 } else {
1354 spin_unlock(&transaction->t_handle_lock);
1355 } 1388 }
1356 1389
1390 /*
1391 * Once we drop t_updates, if it goes to zero the transaction
1392 * could start commiting on us and eventually disappear. So
1393 * once we do this, we must not dereference transaction
1394 * pointer again.
1395 */
1396 tid = transaction->t_tid;
1397 if (atomic_dec_and_test(&transaction->t_updates)) {
1398 wake_up(&journal->j_wait_updates);
1399 if (journal->j_barrier_count)
1400 wake_up(&journal->j_wait_transaction_locked);
1401 }
1402
1403 if (wait_for_commit)
1404 err = jbd2_log_wait_commit(journal, tid);
1405
1357 lock_map_release(&handle->h_lockdep_map); 1406 lock_map_release(&handle->h_lockdep_map);
1358 1407
1359 jbd2_free_handle(handle); 1408 jbd2_free_handle(handle);
@@ -1719,7 +1768,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1719 goto zap_buffer_unlocked; 1768 goto zap_buffer_unlocked;
1720 1769
1721 /* OK, we have data buffer in journaled mode */ 1770 /* OK, we have data buffer in journaled mode */
1722 spin_lock(&journal->j_state_lock); 1771 write_lock(&journal->j_state_lock);
1723 jbd_lock_bh_state(bh); 1772 jbd_lock_bh_state(bh);
1724 spin_lock(&journal->j_list_lock); 1773 spin_lock(&journal->j_list_lock);
1725 1774
@@ -1772,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1772 jbd2_journal_put_journal_head(jh); 1821 jbd2_journal_put_journal_head(jh);
1773 spin_unlock(&journal->j_list_lock); 1822 spin_unlock(&journal->j_list_lock);
1774 jbd_unlock_bh_state(bh); 1823 jbd_unlock_bh_state(bh);
1775 spin_unlock(&journal->j_state_lock); 1824 write_unlock(&journal->j_state_lock);
1776 return ret; 1825 return ret;
1777 } else { 1826 } else {
1778 /* There is no currently-running transaction. So the 1827 /* There is no currently-running transaction. So the
@@ -1786,7 +1835,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1786 jbd2_journal_put_journal_head(jh); 1835 jbd2_journal_put_journal_head(jh);
1787 spin_unlock(&journal->j_list_lock); 1836 spin_unlock(&journal->j_list_lock);
1788 jbd_unlock_bh_state(bh); 1837 jbd_unlock_bh_state(bh);
1789 spin_unlock(&journal->j_state_lock); 1838 write_unlock(&journal->j_state_lock);
1790 return ret; 1839 return ret;
1791 } else { 1840 } else {
1792 /* The orphan record's transaction has 1841 /* The orphan record's transaction has
@@ -1810,7 +1859,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1810 jbd2_journal_put_journal_head(jh); 1859 jbd2_journal_put_journal_head(jh);
1811 spin_unlock(&journal->j_list_lock); 1860 spin_unlock(&journal->j_list_lock);
1812 jbd_unlock_bh_state(bh); 1861 jbd_unlock_bh_state(bh);
1813 spin_unlock(&journal->j_state_lock); 1862 write_unlock(&journal->j_state_lock);
1814 return 0; 1863 return 0;
1815 } else { 1864 } else {
1816 /* Good, the buffer belongs to the running transaction. 1865 /* Good, the buffer belongs to the running transaction.
@@ -1829,7 +1878,7 @@ zap_buffer:
1829zap_buffer_no_jh: 1878zap_buffer_no_jh:
1830 spin_unlock(&journal->j_list_lock); 1879 spin_unlock(&journal->j_list_lock);
1831 jbd_unlock_bh_state(bh); 1880 jbd_unlock_bh_state(bh);
1832 spin_unlock(&journal->j_state_lock); 1881 write_unlock(&journal->j_state_lock);
1833zap_buffer_unlocked: 1882zap_buffer_unlocked:
1834 clear_buffer_dirty(bh); 1883 clear_buffer_dirty(bh);
1835 J_ASSERT_BH(bh, !buffer_jbddirty(bh)); 1884 J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2136,9 +2185,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2136 /* Locks are here just to force reading of recent values, it is 2185 /* Locks are here just to force reading of recent values, it is
2137 * enough that the transaction was not committing before we started 2186 * enough that the transaction was not committing before we started
2138 * a transaction adding the inode to orphan list */ 2187 * a transaction adding the inode to orphan list */
2139 spin_lock(&journal->j_state_lock); 2188 read_lock(&journal->j_state_lock);
2140 commit_trans = journal->j_committing_transaction; 2189 commit_trans = journal->j_committing_transaction;
2141 spin_unlock(&journal->j_state_lock); 2190 read_unlock(&journal->j_state_lock);
2142 spin_lock(&journal->j_list_lock); 2191 spin_lock(&journal->j_list_lock);
2143 inode_trans = jinode->i_transaction; 2192 inode_trans = jinode->i_transaction;
2144 spin_unlock(&journal->j_list_lock); 2193 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 166062a68230..5fd3b5cecda5 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -232,9 +232,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
232 return 0; 232 return 0;
233 233
234 fail: 234 fail:
235 make_bad_inode(inode); 235 iget_failed(inode);
236 unlock_new_inode(inode);
237 iput(inode);
238 jffs2_free_raw_inode(ri); 236 jffs2_free_raw_inode(ri);
239 return ret; 237 return ret;
240} 238}
@@ -454,9 +452,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
454 return 0; 452 return 0;
455 453
456 fail: 454 fail:
457 make_bad_inode(inode); 455 iget_failed(inode);
458 unlock_new_inode(inode);
459 iput(inode);
460 return ret; 456 return ret;
461} 457}
462 458
@@ -601,9 +597,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
601 return 0; 597 return 0;
602 598
603 fail: 599 fail:
604 make_bad_inode(inode); 600 iget_failed(inode);
605 unlock_new_inode(inode);
606 iput(inode);
607 return ret; 601 return ret;
608} 602}
609 603
@@ -778,9 +772,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
778 return 0; 772 return 0;
779 773
780 fail: 774 fail:
781 make_bad_inode(inode); 775 iget_failed(inode);
782 unlock_new_inode(inode);
783 iput(inode);
784 return ret; 776 return ret;
785} 777}
786 778
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 459d39d1ea0b..ac0638f04969 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -169,13 +169,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
169 mutex_unlock(&f->sem); 169 mutex_unlock(&f->sem);
170 jffs2_complete_reservation(c); 170 jffs2_complete_reservation(c);
171 171
172 /* We have to do the simple_setsize() without f->sem held, since 172 /* We have to do the truncate_setsize() without f->sem held, since
173 some pages may be locked and waiting for it in readpage(). 173 some pages may be locked and waiting for it in readpage().
174 We are protected from a simultaneous write() extending i_size 174 We are protected from a simultaneous write() extending i_size
175 back past iattr->ia_size, because do_truncate() holds the 175 back past iattr->ia_size, because do_truncate() holds the
176 generic inode semaphore. */ 176 generic inode semaphore. */
177 if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { 177 if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
178 simple_setsize(inode, iattr->ia_size); 178 truncate_setsize(inode, iattr->ia_size);
179 inode->i_blocks = (inode->i_size + 511) >> 9; 179 inode->i_blocks = (inode->i_size + 511) >> 9;
180 } 180 }
181 181
@@ -225,7 +225,7 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
225} 225}
226 226
227 227
228void jffs2_clear_inode (struct inode *inode) 228void jffs2_evict_inode (struct inode *inode)
229{ 229{
230 /* We can forget about this inode for now - drop all 230 /* We can forget about this inode for now - drop all
231 * the nodelists associated with it, etc. 231 * the nodelists associated with it, etc.
@@ -233,7 +233,9 @@ void jffs2_clear_inode (struct inode *inode)
233 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 233 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
234 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 234 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
235 235
236 D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); 236 D1(printk(KERN_DEBUG "jffs2_evict_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
237 truncate_inode_pages(&inode->i_data, 0);
238 end_writeback(inode);
237 jffs2_do_clear_inode(c, f); 239 jffs2_do_clear_inode(c, f);
238} 240}
239 241
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 4791aacf3084..00bae7cc2e48 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -171,7 +171,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations;
171int jffs2_setattr (struct dentry *, struct iattr *); 171int jffs2_setattr (struct dentry *, struct iattr *);
172int jffs2_do_setattr (struct inode *, struct iattr *); 172int jffs2_do_setattr (struct inode *, struct iattr *);
173struct inode *jffs2_iget(struct super_block *, unsigned long); 173struct inode *jffs2_iget(struct super_block *, unsigned long);
174void jffs2_clear_inode (struct inode *); 174void jffs2_evict_inode (struct inode *);
175void jffs2_dirty_inode(struct inode *inode); 175void jffs2_dirty_inode(struct inode *inode);
176struct inode *jffs2_new_inode (struct inode *dir_i, int mode, 176struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
177 struct jffs2_raw_inode *ri); 177 struct jffs2_raw_inode *ri);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 511e2d609d12..662bba099501 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -135,7 +135,7 @@ static const struct super_operations jffs2_super_operations =
135 .write_super = jffs2_write_super, 135 .write_super = jffs2_write_super,
136 .statfs = jffs2_statfs, 136 .statfs = jffs2_statfs,
137 .remount_fs = jffs2_remount_fs, 137 .remount_fs = jffs2_remount_fs,
138 .clear_inode = jffs2_clear_inode, 138 .evict_inode = jffs2_evict_inode,
139 .dirty_inode = jffs2_dirty_inode, 139 .dirty_inode = jffs2_dirty_inode,
140 .sync_fs = jffs2_sync_fs, 140 .sync_fs = jffs2_sync_fs,
141}; 141};
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d258e261bdc7..9b572ca40a49 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -588,7 +588,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re
588 588
589void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) 589void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
590{ 590{
591 /* It's called from jffs2_clear_inode() on inode removing. 591 /* It's called from jffs2_evict_inode() on inode removing.
592 When an inode with XATTR is removed, those XATTRs must be removed. */ 592 When an inode with XATTR is removed, those XATTRs must be removed. */
593 struct jffs2_xattr_ref *ref, *_ref; 593 struct jffs2_xattr_ref *ref, *_ref;
594 594
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 127263cc8657..c5ce6c1d1ff4 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -17,6 +17,7 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#include <linux/mm.h>
20#include <linux/fs.h> 21#include <linux/fs.h>
21#include <linux/quotaops.h> 22#include <linux/quotaops.h>
22#include "jfs_incore.h" 23#include "jfs_incore.h"
@@ -107,11 +108,18 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
107 return rc; 108 return rc;
108 } 109 }
109 110
110 rc = inode_setattr(inode, iattr); 111 if ((iattr->ia_valid & ATTR_SIZE) &&
112 iattr->ia_size != i_size_read(inode)) {
113 rc = vmtruncate(inode, iattr->ia_size);
114 if (rc)
115 return rc;
116 }
111 117
112 if (!rc && (iattr->ia_valid & ATTR_MODE)) 118 setattr_copy(inode, iattr);
113 rc = jfs_acl_chmod(inode); 119 mark_inode_dirty(inode);
114 120
121 if (iattr->ia_valid & ATTR_MODE)
122 rc = jfs_acl_chmod(inode);
115 return rc; 123 return rc;
116} 124}
117 125
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index ed9ba6fe04f5..9978803ceedc 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -145,31 +145,32 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
145 return 0; 145 return 0;
146} 146}
147 147
148void jfs_delete_inode(struct inode *inode) 148void jfs_evict_inode(struct inode *inode)
149{ 149{
150 jfs_info("In jfs_delete_inode, inode = 0x%p", inode); 150 jfs_info("In jfs_evict_inode, inode = 0x%p", inode);
151 151
152 if (!is_bad_inode(inode)) 152 if (!inode->i_nlink && !is_bad_inode(inode)) {
153 dquot_initialize(inode); 153 dquot_initialize(inode);
154 154
155 if (!is_bad_inode(inode) && 155 if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
156 (JFS_IP(inode)->fileset == FILESYSTEM_I)) { 156 truncate_inode_pages(&inode->i_data, 0);
157 truncate_inode_pages(&inode->i_data, 0);
158 157
159 if (test_cflag(COMMIT_Freewmap, inode)) 158 if (test_cflag(COMMIT_Freewmap, inode))
160 jfs_free_zero_link(inode); 159 jfs_free_zero_link(inode);
161 160
162 diFree(inode); 161 diFree(inode);
163 162
164 /* 163 /*
165 * Free the inode from the quota allocation. 164 * Free the inode from the quota allocation.
166 */ 165 */
167 dquot_initialize(inode); 166 dquot_initialize(inode);
168 dquot_free_inode(inode); 167 dquot_free_inode(inode);
169 dquot_drop(inode); 168 }
169 } else {
170 truncate_inode_pages(&inode->i_data, 0);
170 } 171 }
171 172 end_writeback(inode);
172 clear_inode(inode); 173 dquot_drop(inode);
173} 174}
174 175
175void jfs_dirty_inode(struct inode *inode) 176void jfs_dirty_inode(struct inode *inode)
@@ -303,8 +304,17 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
303 loff_t pos, unsigned len, unsigned flags, 304 loff_t pos, unsigned len, unsigned flags,
304 struct page **pagep, void **fsdata) 305 struct page **pagep, void **fsdata)
305{ 306{
306 return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 307 int ret;
308
309 ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
307 jfs_get_block); 310 jfs_get_block);
311 if (unlikely(ret)) {
312 loff_t isize = mapping->host->i_size;
313 if (pos + len > isize)
314 vmtruncate(mapping->host, isize);
315 }
316
317 return ret;
308} 318}
309 319
310static sector_t jfs_bmap(struct address_space *mapping, sector_t block) 320static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
@@ -317,9 +327,24 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
317{ 327{
318 struct file *file = iocb->ki_filp; 328 struct file *file = iocb->ki_filp;
319 struct inode *inode = file->f_mapping->host; 329 struct inode *inode = file->f_mapping->host;
330 ssize_t ret;
320 331
321 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 332 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
322 offset, nr_segs, jfs_get_block, NULL); 333 offset, nr_segs, jfs_get_block, NULL);
334
335 /*
336 * In case of error extending write may have instantiated a few
337 * blocks outside i_size. Trim these off again.
338 */
339 if (unlikely((rw & WRITE) && ret < 0)) {
340 loff_t isize = i_size_read(inode);
341 loff_t end = offset + iov_length(iov, nr_segs);
342
343 if (end > isize)
344 vmtruncate(inode, isize);
345 }
346
347 return ret;
323} 348}
324 349
325const struct address_space_operations jfs_aops = { 350const struct address_space_operations jfs_aops = {
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 11042b1f44b5..155e91eff07d 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -27,7 +27,7 @@ extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
27extern struct inode *jfs_iget(struct super_block *, unsigned long); 27extern struct inode *jfs_iget(struct super_block *, unsigned long);
28extern int jfs_commit_inode(struct inode *, int); 28extern int jfs_commit_inode(struct inode *, int);
29extern int jfs_write_inode(struct inode *, struct writeback_control *); 29extern int jfs_write_inode(struct inode *, struct writeback_control *);
30extern void jfs_delete_inode(struct inode *); 30extern void jfs_evict_inode(struct inode *);
31extern void jfs_dirty_inode(struct inode *); 31extern void jfs_dirty_inode(struct inode *);
32extern void jfs_truncate(struct inode *); 32extern void jfs_truncate(struct inode *);
33extern void jfs_truncate_nolock(struct inode *, loff_t); 33extern void jfs_truncate_nolock(struct inode *, loff_t);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b38f96bef829..ec8c3e4baca3 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -132,11 +132,6 @@ static void jfs_destroy_inode(struct inode *inode)
132 kmem_cache_free(jfs_inode_cachep, ji); 132 kmem_cache_free(jfs_inode_cachep, ji);
133} 133}
134 134
135static void jfs_clear_inode(struct inode *inode)
136{
137 dquot_drop(inode);
138}
139
140static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) 135static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
141{ 136{
142 struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); 137 struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
@@ -765,8 +760,7 @@ static const struct super_operations jfs_super_operations = {
765 .destroy_inode = jfs_destroy_inode, 760 .destroy_inode = jfs_destroy_inode,
766 .dirty_inode = jfs_dirty_inode, 761 .dirty_inode = jfs_dirty_inode,
767 .write_inode = jfs_write_inode, 762 .write_inode = jfs_write_inode,
768 .delete_inode = jfs_delete_inode, 763 .evict_inode = jfs_evict_inode,
769 .clear_inode = jfs_clear_inode,
770 .put_super = jfs_put_super, 764 .put_super = jfs_put_super,
771 .sync_fs = jfs_sync_fs, 765 .sync_fs = jfs_sync_fs,
772 .freeze_fs = jfs_freeze, 766 .freeze_fs = jfs_freeze,
diff --git a/fs/libfs.c b/fs/libfs.c
index dcaf972cbf1b..0a9da95317f7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -327,77 +327,35 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
327} 327}
328 328
329/** 329/**
330 * simple_setsize - handle core mm and vfs requirements for file size change 330 * simple_setattr - setattr for simple filesystem
331 * @inode: inode
332 * @newsize: new file size
333 *
334 * Returns 0 on success, -error on failure.
335 *
336 * simple_setsize must be called with inode_mutex held.
337 *
338 * simple_setsize will check that the requested new size is OK (see
339 * inode_newsize_ok), and then will perform the necessary i_size update
340 * and pagecache truncation (if necessary). It will be typically be called
341 * from the filesystem's setattr function when ATTR_SIZE is passed in.
342 *
343 * The inode itself must have correct permissions and attributes to allow
344 * i_size to be changed, this function then just checks that the new size
345 * requested is valid.
346 *
347 * In the case of simple in-memory filesystems with inodes stored solely
348 * in the inode cache, and file data in the pagecache, nothing more needs
349 * to be done to satisfy a truncate request. Filesystems with on-disk
350 * blocks for example will need to free them in the case of truncate, in
351 * that case it may be easier not to use simple_setsize (but each of its
352 * components will likely be required at some point to update pagecache
353 * and inode etc).
354 */
355int simple_setsize(struct inode *inode, loff_t newsize)
356{
357 loff_t oldsize;
358 int error;
359
360 error = inode_newsize_ok(inode, newsize);
361 if (error)
362 return error;
363
364 oldsize = inode->i_size;
365 i_size_write(inode, newsize);
366 truncate_pagecache(inode, oldsize, newsize);
367
368 return error;
369}
370EXPORT_SYMBOL(simple_setsize);
371
372/**
373 * simple_setattr - setattr for simple in-memory filesystem
374 * @dentry: dentry 331 * @dentry: dentry
375 * @iattr: iattr structure 332 * @iattr: iattr structure
376 * 333 *
377 * Returns 0 on success, -error on failure. 334 * Returns 0 on success, -error on failure.
378 * 335 *
379 * simple_setattr implements setattr for an in-memory filesystem which 336 * simple_setattr is a simple ->setattr implementation without a proper
380 * does not store its own file data or metadata (eg. uses the page cache 337 * implementation of size changes.
381 * and inode cache as its data store). 338 *
339 * It can either be used for in-memory filesystems or special files
340 * on simple regular filesystems. Anything that needs to change on-disk
341 * or wire state on size changes needs its own setattr method.
382 */ 342 */
383int simple_setattr(struct dentry *dentry, struct iattr *iattr) 343int simple_setattr(struct dentry *dentry, struct iattr *iattr)
384{ 344{
385 struct inode *inode = dentry->d_inode; 345 struct inode *inode = dentry->d_inode;
386 int error; 346 int error;
387 347
348 WARN_ON_ONCE(inode->i_op->truncate);
349
388 error = inode_change_ok(inode, iattr); 350 error = inode_change_ok(inode, iattr);
389 if (error) 351 if (error)
390 return error; 352 return error;
391 353
392 if (iattr->ia_valid & ATTR_SIZE) { 354 if (iattr->ia_valid & ATTR_SIZE)
393 error = simple_setsize(inode, iattr->ia_size); 355 truncate_setsize(inode, iattr->ia_size);
394 if (error) 356 setattr_copy(inode, iattr);
395 return error; 357 mark_inode_dirty(inode);
396 } 358 return 0;
397
398 generic_setattr(inode, iattr);
399
400 return error;
401} 359}
402EXPORT_SYMBOL(simple_setattr); 360EXPORT_SYMBOL(simple_setattr);
403 361
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 72d1893ddd36..675cc49197fe 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -434,8 +434,11 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
434 int ret; 434 int ret;
435 435
436 ta = kzalloc(sizeof(*ta), GFP_KERNEL); 436 ta = kzalloc(sizeof(*ta), GFP_KERNEL);
437 if (!ta) 437 if (!ta) {
438 inode->i_nlink--;
439 iput(inode);
438 return -ENOMEM; 440 return -ENOMEM;
441 }
439 442
440 ta->state = CREATE_1; 443 ta->state = CREATE_1;
441 ta->ino = inode->i_ino; 444 ta->ino = inode->i_ino;
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index abe1cafbd4c2..4dd0f7c06e39 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -232,15 +232,19 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
232 struct inode *inode = dentry->d_inode; 232 struct inode *inode = dentry->d_inode;
233 int err = 0; 233 int err = 0;
234 234
235 if (attr->ia_valid & ATTR_SIZE) 235 err = inode_change_ok(inode, attr);
236 if (err)
237 return err;
238
239 if (attr->ia_valid & ATTR_SIZE) {
236 err = logfs_truncate(inode, attr->ia_size); 240 err = logfs_truncate(inode, attr->ia_size);
237 attr->ia_valid &= ~ATTR_SIZE; 241 if (err)
242 return err;
243 }
238 244
239 if (!err) 245 setattr_copy(inode, attr);
240 err = inode_change_ok(inode, attr); 246 mark_inode_dirty(inode);
241 if (!err) 247 return 0;
242 err = inode_setattr(inode, attr);
243 return err;
244} 248}
245 249
246const struct inode_operations logfs_reg_iops = { 250const struct inode_operations logfs_reg_iops = {
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index f602e230e162..d8c71ece098f 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -235,33 +235,21 @@ static struct inode *logfs_alloc_inode(struct super_block *sb)
235 * purpose is to create a new inode that will not trigger the warning if such 235 * purpose is to create a new inode that will not trigger the warning if such
236 * an inode is still in use. An ugly hack, no doubt. Suggections for 236 * an inode is still in use. An ugly hack, no doubt. Suggections for
237 * improvement are welcome. 237 * improvement are welcome.
238 *
239 * AV: that's what ->put_super() is for...
238 */ 240 */
239struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino) 241struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
240{ 242{
241 struct inode *inode; 243 struct inode *inode;
242 244
243 inode = logfs_alloc_inode(sb); 245 inode = new_inode(sb);
244 if (!inode) 246 if (!inode)
245 return ERR_PTR(-ENOMEM); 247 return ERR_PTR(-ENOMEM);
246 248
247 inode->i_mode = S_IFREG; 249 inode->i_mode = S_IFREG;
248 inode->i_ino = ino; 250 inode->i_ino = ino;
249 inode->i_sb = sb; 251 inode->i_data.a_ops = &logfs_reg_aops;
250 252 mapping_set_gfp_mask(&inode->i_data, GFP_NOFS);
251 /* This is a blatant copy of alloc_inode code. We'd need alloc_inode
252 * to be nonstatic, alas. */
253 {
254 struct address_space * const mapping = &inode->i_data;
255
256 mapping->a_ops = &logfs_reg_aops;
257 mapping->host = inode;
258 mapping->flags = 0;
259 mapping_set_gfp_mask(mapping, GFP_NOFS);
260 mapping->assoc_mapping = NULL;
261 mapping->backing_dev_info = &default_backing_dev_info;
262 inode->i_mapping = mapping;
263 inode->i_nlink = 1;
264 }
265 253
266 return inode; 254 return inode;
267} 255}
@@ -277,7 +265,7 @@ struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino)
277 265
278 err = logfs_read_inode(inode); 266 err = logfs_read_inode(inode);
279 if (err) { 267 if (err) {
280 destroy_meta_inode(inode); 268 iput(inode);
281 return ERR_PTR(err); 269 return ERR_PTR(err);
282 } 270 }
283 logfs_inode_setops(inode); 271 logfs_inode_setops(inode);
@@ -298,18 +286,8 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
298 return ret; 286 return ret;
299} 287}
300 288
301void destroy_meta_inode(struct inode *inode)
302{
303 if (inode) {
304 if (inode->i_data.nrpages)
305 truncate_inode_pages(&inode->i_data, 0);
306 logfs_clear_inode(inode);
307 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
308 }
309}
310
311/* called with inode_lock held */ 289/* called with inode_lock held */
312static void logfs_drop_inode(struct inode *inode) 290static int logfs_drop_inode(struct inode *inode)
313{ 291{
314 struct logfs_super *super = logfs_super(inode->i_sb); 292 struct logfs_super *super = logfs_super(inode->i_sb);
315 struct logfs_inode *li = logfs_inode(inode); 293 struct logfs_inode *li = logfs_inode(inode);
@@ -317,7 +295,7 @@ static void logfs_drop_inode(struct inode *inode)
317 spin_lock(&logfs_inode_lock); 295 spin_lock(&logfs_inode_lock);
318 list_move(&li->li_freeing_list, &super->s_freeing_list); 296 list_move(&li->li_freeing_list, &super->s_freeing_list);
319 spin_unlock(&logfs_inode_lock); 297 spin_unlock(&logfs_inode_lock);
320 generic_drop_inode(inode); 298 return generic_drop_inode(inode);
321} 299}
322 300
323static void logfs_set_ino_generation(struct super_block *sb, 301static void logfs_set_ino_generation(struct super_block *sb,
@@ -384,12 +362,21 @@ static int logfs_sync_fs(struct super_block *sb, int wait)
384 return 0; 362 return 0;
385} 363}
386 364
365static void logfs_put_super(struct super_block *sb)
366{
367 struct logfs_super *super = logfs_super(sb);
368 /* kill the meta-inodes */
369 iput(super->s_master_inode);
370 iput(super->s_segfile_inode);
371 iput(super->s_mapping_inode);
372}
373
387const struct super_operations logfs_super_operations = { 374const struct super_operations logfs_super_operations = {
388 .alloc_inode = logfs_alloc_inode, 375 .alloc_inode = logfs_alloc_inode,
389 .clear_inode = logfs_clear_inode,
390 .delete_inode = logfs_delete_inode,
391 .destroy_inode = logfs_destroy_inode, 376 .destroy_inode = logfs_destroy_inode,
377 .evict_inode = logfs_evict_inode,
392 .drop_inode = logfs_drop_inode, 378 .drop_inode = logfs_drop_inode,
379 .put_super = logfs_put_super,
393 .write_inode = logfs_write_inode, 380 .write_inode = logfs_write_inode,
394 .statfs = logfs_statfs, 381 .statfs = logfs_statfs,
395 .sync_fs = logfs_sync_fs, 382 .sync_fs = logfs_sync_fs,
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 4b0e0616b357..f46ee8b0e135 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -889,8 +889,6 @@ void logfs_cleanup_journal(struct super_block *sb)
889 struct logfs_super *super = logfs_super(sb); 889 struct logfs_super *super = logfs_super(sb);
890 890
891 btree_grim_visitor32(&super->s_reserved_segments, 0, NULL); 891 btree_grim_visitor32(&super->s_reserved_segments, 0, NULL);
892 destroy_meta_inode(super->s_master_inode);
893 super->s_master_inode = NULL;
894 892
895 kfree(super->s_compressed_je); 893 kfree(super->s_compressed_je);
896 kfree(super->s_je); 894 kfree(super->s_je);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index c838c4d72111..5e3b72077951 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -525,13 +525,11 @@ struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino); 525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
526int logfs_init_inode_cache(void); 526int logfs_init_inode_cache(void);
527void logfs_destroy_inode_cache(void); 527void logfs_destroy_inode_cache(void);
528void destroy_meta_inode(struct inode *inode);
529void logfs_set_blocks(struct inode *inode, u64 no); 528void logfs_set_blocks(struct inode *inode, u64 no);
530/* these logically belong into inode.c but actually reside in readwrite.c */ 529/* these logically belong into inode.c but actually reside in readwrite.c */
531int logfs_read_inode(struct inode *inode); 530int logfs_read_inode(struct inode *inode);
532int __logfs_write_inode(struct inode *inode, long flags); 531int __logfs_write_inode(struct inode *inode, long flags);
533void logfs_delete_inode(struct inode *inode); 532void logfs_evict_inode(struct inode *inode);
534void logfs_clear_inode(struct inode *inode);
535 533
536/* journal.c */ 534/* journal.c */
537void logfs_write_anchor(struct super_block *sb); 535void logfs_write_anchor(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 0718d112a1a5..6127baf0e188 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1972,31 +1972,6 @@ static struct page *inode_to_page(struct inode *inode)
1972 return page; 1972 return page;
1973} 1973}
1974 1974
1975/* Cheaper version of write_inode. All changes are concealed in
1976 * aliases, which are moved back. No write to the medium happens.
1977 */
1978void logfs_clear_inode(struct inode *inode)
1979{
1980 struct super_block *sb = inode->i_sb;
1981 struct logfs_inode *li = logfs_inode(inode);
1982 struct logfs_block *block = li->li_block;
1983 struct page *page;
1984
1985 /* Only deleted files may be dirty at this point */
1986 BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
1987 if (!block)
1988 return;
1989 if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
1990 block->ops->free_block(inode->i_sb, block);
1991 return;
1992 }
1993
1994 BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
1995 page = inode_to_page(inode);
1996 BUG_ON(!page); /* FIXME: Use emergency page */
1997 logfs_put_write_page(page);
1998}
1999
2000static int do_write_inode(struct inode *inode) 1975static int do_write_inode(struct inode *inode)
2001{ 1976{
2002 struct super_block *sb = inode->i_sb; 1977 struct super_block *sb = inode->i_sb;
@@ -2164,18 +2139,40 @@ static int do_delete_inode(struct inode *inode)
2164 * ZOMBIE inodes have already been deleted before and should remain dead, 2139 * ZOMBIE inodes have already been deleted before and should remain dead,
2165 * if it weren't for valid checking. No need to kill them again here. 2140 * if it weren't for valid checking. No need to kill them again here.
2166 */ 2141 */
2167void logfs_delete_inode(struct inode *inode) 2142void logfs_evict_inode(struct inode *inode)
2168{ 2143{
2144 struct super_block *sb = inode->i_sb;
2169 struct logfs_inode *li = logfs_inode(inode); 2145 struct logfs_inode *li = logfs_inode(inode);
2146 struct logfs_block *block = li->li_block;
2147 struct page *page;
2170 2148
2171 if (!(li->li_flags & LOGFS_IF_ZOMBIE)) { 2149 if (!inode->i_nlink) {
2172 li->li_flags |= LOGFS_IF_ZOMBIE; 2150 if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
2173 if (i_size_read(inode) > 0) 2151 li->li_flags |= LOGFS_IF_ZOMBIE;
2174 logfs_truncate(inode, 0); 2152 if (i_size_read(inode) > 0)
2175 do_delete_inode(inode); 2153 logfs_truncate(inode, 0);
2154 do_delete_inode(inode);
2155 }
2176 } 2156 }
2177 truncate_inode_pages(&inode->i_data, 0); 2157 truncate_inode_pages(&inode->i_data, 0);
2178 clear_inode(inode); 2158 end_writeback(inode);
2159
2160 /* Cheaper version of write_inode. All changes are concealed in
2161 * aliases, which are moved back. No write to the medium happens.
2162 */
2163 /* Only deleted files may be dirty at this point */
2164 BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
2165 if (!block)
2166 return;
2167 if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
2168 block->ops->free_block(inode->i_sb, block);
2169 return;
2170 }
2171
2172 BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
2173 page = inode_to_page(inode);
2174 BUG_ON(!page); /* FIXME: Use emergency page */
2175 logfs_put_write_page(page);
2179} 2176}
2180 2177
2181void btree_write_block(struct logfs_block *block) 2178void btree_write_block(struct logfs_block *block)
@@ -2272,7 +2269,6 @@ void logfs_cleanup_rw(struct super_block *sb)
2272{ 2269{
2273 struct logfs_super *super = logfs_super(sb); 2270 struct logfs_super *super = logfs_super(sb);
2274 2271
2275 destroy_meta_inode(super->s_segfile_inode);
2276 logfs_mempool_destroy(super->s_block_pool); 2272 logfs_mempool_destroy(super->s_block_pool);
2277 logfs_mempool_destroy(super->s_shadow_pool); 2273 logfs_mempool_destroy(super->s_shadow_pool);
2278} 2274}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index a9657afb70ad..9d5187353255 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -929,5 +929,4 @@ void logfs_cleanup_areas(struct super_block *sb)
929 for_each_area(i) 929 for_each_area(i)
930 free_area(super->s_area[i]); 930 free_area(super->s_area[i]);
931 free_area(super->s_journal_area); 931 free_area(super->s_journal_area);
932 destroy_meta_inode(super->s_mapping_inode);
933} 932}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index d651e10a1e9c..5336155c5d81 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -342,24 +342,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
342 goto fail; 342 goto fail;
343 } 343 }
344 344
345 /* at that point we know that ->put_super() will be called */
345 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 346 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
346 if (!super->s_erase_page) 347 if (!super->s_erase_page)
347 goto fail; 348 return -ENOMEM;
348 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); 349 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
349 350
350 /* FIXME: check for read-only mounts */ 351 /* FIXME: check for read-only mounts */
351 err = logfs_make_writeable(sb); 352 err = logfs_make_writeable(sb);
352 if (err) 353 if (err) {
353 goto fail1; 354 __free_page(super->s_erase_page);
355 return err;
356 }
354 357
355 log_super("LogFS: Finished mounting\n"); 358 log_super("LogFS: Finished mounting\n");
356 simple_set_mnt(mnt, sb); 359 simple_set_mnt(mnt, sb);
357 return 0; 360 return 0;
358 361
359fail1:
360 __free_page(super->s_erase_page);
361fail: 362fail:
362 iput(logfs_super(sb)->s_master_inode); 363 iput(super->s_master_inode);
364 iput(super->s_segfile_inode);
365 iput(super->s_mapping_inode);
363 return -EIO; 366 return -EIO;
364} 367}
365 368
@@ -580,10 +583,14 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
580 sb->s_flags |= MS_ACTIVE; 583 sb->s_flags |= MS_ACTIVE;
581 err = logfs_get_sb_final(sb, mnt); 584 err = logfs_get_sb_final(sb, mnt);
582 if (err) 585 if (err)
583 goto err1; 586 deactivate_locked_super(sb);
584 return 0; 587 return err;
585 588
586err1: 589err1:
590 /* no ->s_root, no ->put_super() */
591 iput(super->s_master_inode);
592 iput(super->s_segfile_inode);
593 iput(super->s_mapping_inode);
587 deactivate_locked_super(sb); 594 deactivate_locked_super(sb);
588 return err; 595 return err;
589err0: 596err0:
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e28f21b95344..cf4e6cdfd15b 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -79,15 +79,11 @@ EXPORT_SYMBOL(mb_cache_entry_find_next);
79struct mb_cache { 79struct mb_cache {
80 struct list_head c_cache_list; 80 struct list_head c_cache_list;
81 const char *c_name; 81 const char *c_name;
82 struct mb_cache_op c_op;
83 atomic_t c_entry_count; 82 atomic_t c_entry_count;
84 int c_bucket_bits; 83 int c_bucket_bits;
85#ifndef MB_CACHE_INDEXES_COUNT 84 struct kmem_cache *c_entry_cache;
86 int c_indexes_count;
87#endif
88 struct kmem_cache *c_entry_cache;
89 struct list_head *c_block_hash; 85 struct list_head *c_block_hash;
90 struct list_head *c_indexes_hash[0]; 86 struct list_head *c_index_hash;
91}; 87};
92 88
93 89
@@ -101,16 +97,6 @@ static LIST_HEAD(mb_cache_list);
101static LIST_HEAD(mb_cache_lru_list); 97static LIST_HEAD(mb_cache_lru_list);
102static DEFINE_SPINLOCK(mb_cache_spinlock); 98static DEFINE_SPINLOCK(mb_cache_spinlock);
103 99
104static inline int
105mb_cache_indexes(struct mb_cache *cache)
106{
107#ifdef MB_CACHE_INDEXES_COUNT
108 return MB_CACHE_INDEXES_COUNT;
109#else
110 return cache->c_indexes_count;
111#endif
112}
113
114/* 100/*
115 * What the mbcache registers as to get shrunk dynamically. 101 * What the mbcache registers as to get shrunk dynamically.
116 */ 102 */
@@ -132,12 +118,9 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
132static void 118static void
133__mb_cache_entry_unhash(struct mb_cache_entry *ce) 119__mb_cache_entry_unhash(struct mb_cache_entry *ce)
134{ 120{
135 int n;
136
137 if (__mb_cache_entry_is_hashed(ce)) { 121 if (__mb_cache_entry_is_hashed(ce)) {
138 list_del_init(&ce->e_block_list); 122 list_del_init(&ce->e_block_list);
139 for (n=0; n<mb_cache_indexes(ce->e_cache); n++) 123 list_del(&ce->e_index.o_list);
140 list_del(&ce->e_indexes[n].o_list);
141 } 124 }
142} 125}
143 126
@@ -148,16 +131,8 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
148 struct mb_cache *cache = ce->e_cache; 131 struct mb_cache *cache = ce->e_cache;
149 132
150 mb_assert(!(ce->e_used || ce->e_queued)); 133 mb_assert(!(ce->e_used || ce->e_queued));
151 if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { 134 kmem_cache_free(cache->c_entry_cache, ce);
152 /* free failed -- put back on the lru list 135 atomic_dec(&cache->c_entry_count);
153 for freeing later. */
154 spin_lock(&mb_cache_spinlock);
155 list_add(&ce->e_lru_list, &mb_cache_lru_list);
156 spin_unlock(&mb_cache_spinlock);
157 } else {
158 kmem_cache_free(cache->c_entry_cache, ce);
159 atomic_dec(&cache->c_entry_count);
160 }
161} 136}
162 137
163 138
@@ -201,22 +176,12 @@ static int
201mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 176mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
202{ 177{
203 LIST_HEAD(free_list); 178 LIST_HEAD(free_list);
204 struct list_head *l, *ltmp; 179 struct mb_cache *cache;
180 struct mb_cache_entry *entry, *tmp;
205 int count = 0; 181 int count = 0;
206 182
207 spin_lock(&mb_cache_spinlock);
208 list_for_each(l, &mb_cache_list) {
209 struct mb_cache *cache =
210 list_entry(l, struct mb_cache, c_cache_list);
211 mb_debug("cache %s (%d)", cache->c_name,
212 atomic_read(&cache->c_entry_count));
213 count += atomic_read(&cache->c_entry_count);
214 }
215 mb_debug("trying to free %d entries", nr_to_scan); 183 mb_debug("trying to free %d entries", nr_to_scan);
216 if (nr_to_scan == 0) { 184 spin_lock(&mb_cache_spinlock);
217 spin_unlock(&mb_cache_spinlock);
218 goto out;
219 }
220 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { 185 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) {
221 struct mb_cache_entry *ce = 186 struct mb_cache_entry *ce =
222 list_entry(mb_cache_lru_list.next, 187 list_entry(mb_cache_lru_list.next,
@@ -224,12 +189,15 @@ mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
224 list_move_tail(&ce->e_lru_list, &free_list); 189 list_move_tail(&ce->e_lru_list, &free_list);
225 __mb_cache_entry_unhash(ce); 190 __mb_cache_entry_unhash(ce);
226 } 191 }
192 list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
193 mb_debug("cache %s (%d)", cache->c_name,
194 atomic_read(&cache->c_entry_count));
195 count += atomic_read(&cache->c_entry_count);
196 }
227 spin_unlock(&mb_cache_spinlock); 197 spin_unlock(&mb_cache_spinlock);
228 list_for_each_safe(l, ltmp, &free_list) { 198 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
229 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 199 __mb_cache_entry_forget(entry, gfp_mask);
230 e_lru_list), gfp_mask);
231 } 200 }
232out:
233 return (count / 100) * sysctl_vfs_cache_pressure; 201 return (count / 100) * sysctl_vfs_cache_pressure;
234} 202}
235 203
@@ -243,72 +211,49 @@ out:
243 * memory was available. 211 * memory was available.
244 * 212 *
245 * @name: name of the cache (informal) 213 * @name: name of the cache (informal)
246 * @cache_op: contains the callback called when freeing a cache entry
247 * @entry_size: The size of a cache entry, including
248 * struct mb_cache_entry
249 * @indexes_count: number of additional indexes in the cache. Must equal
250 * MB_CACHE_INDEXES_COUNT if the number of indexes is
251 * hardwired.
252 * @bucket_bits: log2(number of hash buckets) 214 * @bucket_bits: log2(number of hash buckets)
253 */ 215 */
254struct mb_cache * 216struct mb_cache *
255mb_cache_create(const char *name, struct mb_cache_op *cache_op, 217mb_cache_create(const char *name, int bucket_bits)
256 size_t entry_size, int indexes_count, int bucket_bits)
257{ 218{
258 int m=0, n, bucket_count = 1 << bucket_bits; 219 int n, bucket_count = 1 << bucket_bits;
259 struct mb_cache *cache = NULL; 220 struct mb_cache *cache = NULL;
260 221
261 if(entry_size < sizeof(struct mb_cache_entry) + 222 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
262 indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]))
263 return NULL;
264
265 cache = kmalloc(sizeof(struct mb_cache) +
266 indexes_count * sizeof(struct list_head), GFP_KERNEL);
267 if (!cache) 223 if (!cache)
268 goto fail; 224 return NULL;
269 cache->c_name = name; 225 cache->c_name = name;
270 cache->c_op.free = NULL;
271 if (cache_op)
272 cache->c_op.free = cache_op->free;
273 atomic_set(&cache->c_entry_count, 0); 226 atomic_set(&cache->c_entry_count, 0);
274 cache->c_bucket_bits = bucket_bits; 227 cache->c_bucket_bits = bucket_bits;
275#ifdef MB_CACHE_INDEXES_COUNT
276 mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT);
277#else
278 cache->c_indexes_count = indexes_count;
279#endif
280 cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), 228 cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head),
281 GFP_KERNEL); 229 GFP_KERNEL);
282 if (!cache->c_block_hash) 230 if (!cache->c_block_hash)
283 goto fail; 231 goto fail;
284 for (n=0; n<bucket_count; n++) 232 for (n=0; n<bucket_count; n++)
285 INIT_LIST_HEAD(&cache->c_block_hash[n]); 233 INIT_LIST_HEAD(&cache->c_block_hash[n]);
286 for (m=0; m<indexes_count; m++) { 234 cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head),
287 cache->c_indexes_hash[m] = kmalloc(bucket_count * 235 GFP_KERNEL);
288 sizeof(struct list_head), 236 if (!cache->c_index_hash)
289 GFP_KERNEL); 237 goto fail;
290 if (!cache->c_indexes_hash[m]) 238 for (n=0; n<bucket_count; n++)
291 goto fail; 239 INIT_LIST_HEAD(&cache->c_index_hash[n]);
292 for (n=0; n<bucket_count; n++) 240 cache->c_entry_cache = kmem_cache_create(name,
293 INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]); 241 sizeof(struct mb_cache_entry), 0,
294 }
295 cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
296 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 242 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
297 if (!cache->c_entry_cache) 243 if (!cache->c_entry_cache)
298 goto fail; 244 goto fail2;
299 245
300 spin_lock(&mb_cache_spinlock); 246 spin_lock(&mb_cache_spinlock);
301 list_add(&cache->c_cache_list, &mb_cache_list); 247 list_add(&cache->c_cache_list, &mb_cache_list);
302 spin_unlock(&mb_cache_spinlock); 248 spin_unlock(&mb_cache_spinlock);
303 return cache; 249 return cache;
304 250
251fail2:
252 kfree(cache->c_index_hash);
253
305fail: 254fail:
306 if (cache) { 255 kfree(cache->c_block_hash);
307 while (--m >= 0) 256 kfree(cache);
308 kfree(cache->c_indexes_hash[m]);
309 kfree(cache->c_block_hash);
310 kfree(cache);
311 }
312 return NULL; 257 return NULL;
313} 258}
314 259
@@ -357,7 +302,6 @@ mb_cache_destroy(struct mb_cache *cache)
357{ 302{
358 LIST_HEAD(free_list); 303 LIST_HEAD(free_list);
359 struct list_head *l, *ltmp; 304 struct list_head *l, *ltmp;
360 int n;
361 305
362 spin_lock(&mb_cache_spinlock); 306 spin_lock(&mb_cache_spinlock);
363 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 307 list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
@@ -384,8 +328,7 @@ mb_cache_destroy(struct mb_cache *cache)
384 328
385 kmem_cache_destroy(cache->c_entry_cache); 329 kmem_cache_destroy(cache->c_entry_cache);
386 330
387 for (n=0; n < mb_cache_indexes(cache); n++) 331 kfree(cache->c_index_hash);
388 kfree(cache->c_indexes_hash[n]);
389 kfree(cache->c_block_hash); 332 kfree(cache->c_block_hash);
390 kfree(cache); 333 kfree(cache);
391} 334}
@@ -429,17 +372,16 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
429 * 372 *
430 * @bdev: device the cache entry belongs to 373 * @bdev: device the cache entry belongs to
431 * @block: block number 374 * @block: block number
432 * @keys: array of additional keys. There must be indexes_count entries 375 * @key: lookup key
433 * in the array (as specified when creating the cache).
434 */ 376 */
435int 377int
436mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, 378mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
437 sector_t block, unsigned int keys[]) 379 sector_t block, unsigned int key)
438{ 380{
439 struct mb_cache *cache = ce->e_cache; 381 struct mb_cache *cache = ce->e_cache;
440 unsigned int bucket; 382 unsigned int bucket;
441 struct list_head *l; 383 struct list_head *l;
442 int error = -EBUSY, n; 384 int error = -EBUSY;
443 385
444 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 386 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
445 cache->c_bucket_bits); 387 cache->c_bucket_bits);
@@ -454,12 +396,9 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
454 ce->e_bdev = bdev; 396 ce->e_bdev = bdev;
455 ce->e_block = block; 397 ce->e_block = block;
456 list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); 398 list_add(&ce->e_block_list, &cache->c_block_hash[bucket]);
457 for (n=0; n<mb_cache_indexes(cache); n++) { 399 ce->e_index.o_key = key;
458 ce->e_indexes[n].o_key = keys[n]; 400 bucket = hash_long(key, cache->c_bucket_bits);
459 bucket = hash_long(keys[n], cache->c_bucket_bits); 401 list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]);
460 list_add(&ce->e_indexes[n].o_list,
461 &cache->c_indexes_hash[n][bucket]);
462 }
463 error = 0; 402 error = 0;
464out: 403out:
465 spin_unlock(&mb_cache_spinlock); 404 spin_unlock(&mb_cache_spinlock);
@@ -555,13 +494,12 @@ cleanup:
555 494
556static struct mb_cache_entry * 495static struct mb_cache_entry *
557__mb_cache_entry_find(struct list_head *l, struct list_head *head, 496__mb_cache_entry_find(struct list_head *l, struct list_head *head,
558 int index, struct block_device *bdev, unsigned int key) 497 struct block_device *bdev, unsigned int key)
559{ 498{
560 while (l != head) { 499 while (l != head) {
561 struct mb_cache_entry *ce = 500 struct mb_cache_entry *ce =
562 list_entry(l, struct mb_cache_entry, 501 list_entry(l, struct mb_cache_entry, e_index.o_list);
563 e_indexes[index].o_list); 502 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
564 if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) {
565 DEFINE_WAIT(wait); 503 DEFINE_WAIT(wait);
566 504
567 if (!list_empty(&ce->e_lru_list)) 505 if (!list_empty(&ce->e_lru_list))
@@ -603,23 +541,20 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head,
603 * returned cache entry is locked for shared access ("multiple readers"). 541 * returned cache entry is locked for shared access ("multiple readers").
604 * 542 *
605 * @cache: the cache to search 543 * @cache: the cache to search
606 * @index: the number of the additonal index to search (0<=index<indexes_count)
607 * @bdev: the device the cache entry should belong to 544 * @bdev: the device the cache entry should belong to
608 * @key: the key in the index 545 * @key: the key in the index
609 */ 546 */
610struct mb_cache_entry * 547struct mb_cache_entry *
611mb_cache_entry_find_first(struct mb_cache *cache, int index, 548mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
612 struct block_device *bdev, unsigned int key) 549 unsigned int key)
613{ 550{
614 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 551 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
615 struct list_head *l; 552 struct list_head *l;
616 struct mb_cache_entry *ce; 553 struct mb_cache_entry *ce;
617 554
618 mb_assert(index < mb_cache_indexes(cache));
619 spin_lock(&mb_cache_spinlock); 555 spin_lock(&mb_cache_spinlock);
620 l = cache->c_indexes_hash[index][bucket].next; 556 l = cache->c_index_hash[bucket].next;
621 ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], 557 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
622 index, bdev, key);
623 spin_unlock(&mb_cache_spinlock); 558 spin_unlock(&mb_cache_spinlock);
624 return ce; 559 return ce;
625} 560}
@@ -640,12 +575,11 @@ mb_cache_entry_find_first(struct mb_cache *cache, int index,
640 * } 575 * }
641 * 576 *
642 * @prev: The previous match 577 * @prev: The previous match
643 * @index: the number of the additonal index to search (0<=index<indexes_count)
644 * @bdev: the device the cache entry should belong to 578 * @bdev: the device the cache entry should belong to
645 * @key: the key in the index 579 * @key: the key in the index
646 */ 580 */
647struct mb_cache_entry * 581struct mb_cache_entry *
648mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, 582mb_cache_entry_find_next(struct mb_cache_entry *prev,
649 struct block_device *bdev, unsigned int key) 583 struct block_device *bdev, unsigned int key)
650{ 584{
651 struct mb_cache *cache = prev->e_cache; 585 struct mb_cache *cache = prev->e_cache;
@@ -653,11 +587,9 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index,
653 struct list_head *l; 587 struct list_head *l;
654 struct mb_cache_entry *ce; 588 struct mb_cache_entry *ce;
655 589
656 mb_assert(index < mb_cache_indexes(cache));
657 spin_lock(&mb_cache_spinlock); 590 spin_lock(&mb_cache_spinlock);
658 l = prev->e_indexes[index].o_list.next; 591 l = prev->e_index.o_list.next;
659 ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], 592 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
660 index, bdev, key);
661 __mb_cache_entry_release_unlock(prev); 593 __mb_cache_entry_release_unlock(prev);
662 return ce; 594 return ce;
663} 595}
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 482779fe4e7c..3f32bcb0d9bd 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -200,13 +200,13 @@ void minix_free_inode(struct inode * inode)
200 ino = inode->i_ino; 200 ino = inode->i_ino;
201 if (ino < 1 || ino > sbi->s_ninodes) { 201 if (ino < 1 || ino > sbi->s_ninodes) {
202 printk("minix_free_inode: inode 0 or nonexistent inode\n"); 202 printk("minix_free_inode: inode 0 or nonexistent inode\n");
203 goto out; 203 return;
204 } 204 }
205 bit = ino & ((1<<k) - 1); 205 bit = ino & ((1<<k) - 1);
206 ino >>= k; 206 ino >>= k;
207 if (ino >= sbi->s_imap_blocks) { 207 if (ino >= sbi->s_imap_blocks) {
208 printk("minix_free_inode: nonexistent imap in superblock\n"); 208 printk("minix_free_inode: nonexistent imap in superblock\n");
209 goto out; 209 return;
210 } 210 }
211 211
212 minix_clear_inode(inode); /* clear on-disk copy */ 212 minix_clear_inode(inode); /* clear on-disk copy */
@@ -217,8 +217,6 @@ void minix_free_inode(struct inode * inode)
217 printk("minix_free_inode: bit %lu already cleared\n", bit); 217 printk("minix_free_inode: bit %lu already cleared\n", bit);
218 spin_unlock(&bitmap_lock); 218 spin_unlock(&bitmap_lock);
219 mark_buffer_dirty(bh); 219 mark_buffer_dirty(bh);
220 out:
221 clear_inode(inode); /* clear in-memory copy */
222} 220}
223 221
224struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) 222struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 1dbf921ca44b..085a9262c692 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -271,8 +271,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
271 271
272got_it: 272got_it:
273 pos = page_offset(page) + p - (char *)page_address(page); 273 pos = page_offset(page) + p - (char *)page_address(page);
274 err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, 274 err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
275 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
276 if (err) 275 if (err)
277 goto out_unlock; 276 goto out_unlock;
278 memcpy (namx, name, namelen); 277 memcpy (namx, name, namelen);
@@ -297,8 +296,7 @@ out_unlock:
297 296
298int minix_delete_entry(struct minix_dir_entry *de, struct page *page) 297int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
299{ 298{
300 struct address_space *mapping = page->mapping; 299 struct inode *inode = page->mapping->host;
301 struct inode *inode = (struct inode*)mapping->host;
302 char *kaddr = page_address(page); 300 char *kaddr = page_address(page);
303 loff_t pos = page_offset(page) + (char*)de - kaddr; 301 loff_t pos = page_offset(page) + (char*)de - kaddr;
304 struct minix_sb_info *sbi = minix_sb(inode->i_sb); 302 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
@@ -306,8 +304,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
306 int err; 304 int err;
307 305
308 lock_page(page); 306 lock_page(page);
309 err = __minix_write_begin(NULL, mapping, pos, len, 307 err = minix_prepare_chunk(page, pos, len);
310 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
311 if (err == 0) { 308 if (err == 0) {
312 if (sbi->s_version == MINIX_V3) 309 if (sbi->s_version == MINIX_V3)
313 ((minix3_dirent *) de)->inode = 0; 310 ((minix3_dirent *) de)->inode = 0;
@@ -325,16 +322,14 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
325 322
326int minix_make_empty(struct inode *inode, struct inode *dir) 323int minix_make_empty(struct inode *inode, struct inode *dir)
327{ 324{
328 struct address_space *mapping = inode->i_mapping; 325 struct page *page = grab_cache_page(inode->i_mapping, 0);
329 struct page *page = grab_cache_page(mapping, 0);
330 struct minix_sb_info *sbi = minix_sb(inode->i_sb); 326 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
331 char *kaddr; 327 char *kaddr;
332 int err; 328 int err;
333 329
334 if (!page) 330 if (!page)
335 return -ENOMEM; 331 return -ENOMEM;
336 err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize, 332 err = minix_prepare_chunk(page, 0, 2 * sbi->s_dirsize);
337 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
338 if (err) { 333 if (err) {
339 unlock_page(page); 334 unlock_page(page);
340 goto fail; 335 goto fail;
@@ -425,8 +420,7 @@ not_empty:
425void minix_set_link(struct minix_dir_entry *de, struct page *page, 420void minix_set_link(struct minix_dir_entry *de, struct page *page,
426 struct inode *inode) 421 struct inode *inode)
427{ 422{
428 struct address_space *mapping = page->mapping; 423 struct inode *dir = page->mapping->host;
429 struct inode *dir = mapping->host;
430 struct minix_sb_info *sbi = minix_sb(dir->i_sb); 424 struct minix_sb_info *sbi = minix_sb(dir->i_sb);
431 loff_t pos = page_offset(page) + 425 loff_t pos = page_offset(page) +
432 (char *)de-(char*)page_address(page); 426 (char *)de-(char*)page_address(page);
@@ -434,8 +428,7 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page,
434 428
435 lock_page(page); 429 lock_page(page);
436 430
437 err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, 431 err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
438 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
439 if (err == 0) { 432 if (err == 0) {
440 if (sbi->s_version == MINIX_V3) 433 if (sbi->s_version == MINIX_V3)
441 ((minix3_dirent *) de)->inode = inode->i_ino; 434 ((minix3_dirent *) de)->inode = inode->i_ino;
diff --git a/fs/minix/file.c b/fs/minix/file.c
index d5320ff23faf..4493ce695ab8 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,29 @@ const struct file_operations minix_file_operations = {
23 .splice_read = generic_file_splice_read, 23 .splice_read = generic_file_splice_read,
24}; 24};
25 25
26static int minix_setattr(struct dentry *dentry, struct iattr *attr)
27{
28 struct inode *inode = dentry->d_inode;
29 int error;
30
31 error = inode_change_ok(inode, attr);
32 if (error)
33 return error;
34
35 if ((attr->ia_valid & ATTR_SIZE) &&
36 attr->ia_size != i_size_read(inode)) {
37 error = vmtruncate(inode, attr->ia_size);
38 if (error)
39 return error;
40 }
41
42 setattr_copy(inode, attr);
43 mark_inode_dirty(inode);
44 return 0;
45}
46
26const struct inode_operations minix_file_inode_operations = { 47const struct inode_operations minix_file_inode_operations = {
27 .truncate = minix_truncate, 48 .truncate = minix_truncate,
49 .setattr = minix_setattr,
28 .getattr = minix_getattr, 50 .getattr = minix_getattr,
29}; 51};
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 756f8c93780c..e39d6bf2e8fb 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,12 +24,17 @@ static int minix_write_inode(struct inode *inode,
24static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); 24static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
25static int minix_remount (struct super_block * sb, int * flags, char * data); 25static int minix_remount (struct super_block * sb, int * flags, char * data);
26 26
27static void minix_delete_inode(struct inode *inode) 27static void minix_evict_inode(struct inode *inode)
28{ 28{
29 truncate_inode_pages(&inode->i_data, 0); 29 truncate_inode_pages(&inode->i_data, 0);
30 inode->i_size = 0; 30 if (!inode->i_nlink) {
31 minix_truncate(inode); 31 inode->i_size = 0;
32 minix_free_inode(inode); 32 minix_truncate(inode);
33 }
34 invalidate_inode_buffers(inode);
35 end_writeback(inode);
36 if (!inode->i_nlink)
37 minix_free_inode(inode);
33} 38}
34 39
35static void minix_put_super(struct super_block *sb) 40static void minix_put_super(struct super_block *sb)
@@ -96,7 +101,7 @@ static const struct super_operations minix_sops = {
96 .alloc_inode = minix_alloc_inode, 101 .alloc_inode = minix_alloc_inode,
97 .destroy_inode = minix_destroy_inode, 102 .destroy_inode = minix_destroy_inode,
98 .write_inode = minix_write_inode, 103 .write_inode = minix_write_inode,
99 .delete_inode = minix_delete_inode, 104 .evict_inode = minix_evict_inode,
100 .put_super = minix_put_super, 105 .put_super = minix_put_super,
101 .statfs = minix_statfs, 106 .statfs = minix_statfs,
102 .remount_fs = minix_remount, 107 .remount_fs = minix_remount,
@@ -357,20 +362,26 @@ static int minix_readpage(struct file *file, struct page *page)
357 return block_read_full_page(page,minix_get_block); 362 return block_read_full_page(page,minix_get_block);
358} 363}
359 364
360int __minix_write_begin(struct file *file, struct address_space *mapping, 365int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len)
361 loff_t pos, unsigned len, unsigned flags,
362 struct page **pagep, void **fsdata)
363{ 366{
364 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 367 return __block_write_begin(page, pos, len, minix_get_block);
365 minix_get_block);
366} 368}
367 369
368static int minix_write_begin(struct file *file, struct address_space *mapping, 370static int minix_write_begin(struct file *file, struct address_space *mapping,
369 loff_t pos, unsigned len, unsigned flags, 371 loff_t pos, unsigned len, unsigned flags,
370 struct page **pagep, void **fsdata) 372 struct page **pagep, void **fsdata)
371{ 373{
372 *pagep = NULL; 374 int ret;
373 return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 375
376 ret = block_write_begin(mapping, pos, len, flags, pagep,
377 minix_get_block);
378 if (unlikely(ret)) {
379 loff_t isize = mapping->host->i_size;
380 if (pos + len > isize)
381 vmtruncate(mapping->host, isize);
382 }
383
384 return ret;
374} 385}
375 386
376static sector_t minix_bmap(struct address_space *mapping, sector_t block) 387static sector_t minix_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 111f34ee9e3b..407b1c84911e 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -53,9 +53,7 @@ extern int minix_new_block(struct inode * inode);
53extern void minix_free_block(struct inode *inode, unsigned long block); 53extern void minix_free_block(struct inode *inode, unsigned long block);
54extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); 54extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); 55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
56extern int __minix_write_begin(struct file *file, struct address_space *mapping, 56extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
57 loff_t pos, unsigned len, unsigned flags,
58 struct page **pagep, void **fsdata);
59 57
60extern void V1_minix_truncate(struct inode *); 58extern void V1_minix_truncate(struct inode *);
61extern void V2_minix_truncate(struct inode *); 59extern void V2_minix_truncate(struct inode *);
diff --git a/fs/namei.c b/fs/namei.c
index 3479b176a4cd..13ff4abdbdca 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -282,8 +282,7 @@ int inode_permission(struct inode *inode, int mask)
282 if (retval) 282 if (retval)
283 return retval; 283 return retval;
284 284
285 return security_inode_permission(inode, 285 return security_inode_permission(inode, mask);
286 mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
287} 286}
288 287
289/** 288/**
@@ -1484,8 +1483,7 @@ static int handle_truncate(struct path *path)
1484 */ 1483 */
1485 error = locks_verify_locked(inode); 1484 error = locks_verify_locked(inode);
1486 if (!error) 1485 if (!error)
1487 error = security_path_truncate(path, 0, 1486 error = security_path_truncate(path);
1488 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1489 if (!error) { 1487 if (!error) {
1490 error = do_truncate(path->dentry, 0, 1488 error = do_truncate(path->dentry, 0,
1491 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, 1489 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
diff --git a/fs/namespace.c b/fs/namespace.c
index 1969d6b2571e..66c4f7e781cb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1989,7 +1989,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1989 if (flags & MS_RDONLY) 1989 if (flags & MS_RDONLY)
1990 mnt_flags |= MNT_READONLY; 1990 mnt_flags |= MNT_READONLY;
1991 1991
1992 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1992 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
1993 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 1993 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1994 MS_STRICTATIME); 1994 MS_STRICTATIME);
1995 1995
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index fa3385154023..b4de38cf49f5 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -43,7 +43,7 @@
43#define NCP_DEFAULT_TIME_OUT 10 43#define NCP_DEFAULT_TIME_OUT 10
44#define NCP_DEFAULT_RETRY_COUNT 20 44#define NCP_DEFAULT_RETRY_COUNT 20
45 45
46static void ncp_delete_inode(struct inode *); 46static void ncp_evict_inode(struct inode *);
47static void ncp_put_super(struct super_block *); 47static void ncp_put_super(struct super_block *);
48static int ncp_statfs(struct dentry *, struct kstatfs *); 48static int ncp_statfs(struct dentry *, struct kstatfs *);
49static int ncp_show_options(struct seq_file *, struct vfsmount *); 49static int ncp_show_options(struct seq_file *, struct vfsmount *);
@@ -100,7 +100,7 @@ static const struct super_operations ncp_sops =
100 .alloc_inode = ncp_alloc_inode, 100 .alloc_inode = ncp_alloc_inode,
101 .destroy_inode = ncp_destroy_inode, 101 .destroy_inode = ncp_destroy_inode,
102 .drop_inode = generic_delete_inode, 102 .drop_inode = generic_delete_inode,
103 .delete_inode = ncp_delete_inode, 103 .evict_inode = ncp_evict_inode,
104 .put_super = ncp_put_super, 104 .put_super = ncp_put_super,
105 .statfs = ncp_statfs, 105 .statfs = ncp_statfs,
106 .remount_fs = ncp_remount, 106 .remount_fs = ncp_remount,
@@ -282,19 +282,19 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
282} 282}
283 283
284static void 284static void
285ncp_delete_inode(struct inode *inode) 285ncp_evict_inode(struct inode *inode)
286{ 286{
287 truncate_inode_pages(&inode->i_data, 0); 287 truncate_inode_pages(&inode->i_data, 0);
288 end_writeback(inode);
288 289
289 if (S_ISDIR(inode->i_mode)) { 290 if (S_ISDIR(inode->i_mode)) {
290 DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); 291 DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
291 } 292 }
292 293
293 if (ncp_make_closed(inode) != 0) { 294 if (ncp_make_closed(inode) != 0) {
294 /* We can't do anything but complain. */ 295 /* We can't do anything but complain. */
295 printk(KERN_ERR "ncp_delete_inode: could not close\n"); 296 printk(KERN_ERR "ncp_evict_inode: could not close\n");
296 } 297 }
297 clear_inode(inode);
298} 298}
299 299
300static void ncp_stop_tasks(struct ncp_server *server) { 300static void ncp_stop_tasks(struct ncp_server *server) {
@@ -728,8 +728,8 @@ out_fput:
728out_bdi: 728out_bdi:
729 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 729 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
730 * 730 *
731 * The previously used put_filp(ncp_filp); was bogous, since 731 * The previously used put_filp(ncp_filp); was bogus, since
732 * it doesn't proper unlocking. 732 * it doesn't perform proper unlocking.
733 */ 733 */
734 fput(ncp_filp); 734 fput(ncp_filp);
735out: 735out:
@@ -924,9 +924,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
924 tmpattr.ia_valid = ATTR_MODE; 924 tmpattr.ia_valid = ATTR_MODE;
925 tmpattr.ia_mode = attr->ia_mode; 925 tmpattr.ia_mode = attr->ia_mode;
926 926
927 result = inode_setattr(inode, &tmpattr); 927 setattr_copy(inode, &tmpattr);
928 if (result) 928 mark_inode_dirty(inode);
929 goto out;
930 } 929 }
931 } 930 }
932#endif 931#endif
@@ -954,15 +953,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
954 result = ncp_make_closed(inode); 953 result = ncp_make_closed(inode);
955 if (result) 954 if (result)
956 goto out; 955 goto out;
957 { 956
958 struct iattr tmpattr; 957 if (attr->ia_size != i_size_read(inode)) {
959 958 result = vmtruncate(inode, attr->ia_size);
960 tmpattr.ia_valid = ATTR_SIZE;
961 tmpattr.ia_size = attr->ia_size;
962
963 result = inode_setattr(inode, &tmpattr);
964 if (result) 959 if (result)
965 goto out; 960 goto out;
961 mark_inode_dirty(inode);
966 } 962 }
967 } 963 }
968 if ((attr->ia_valid & ATTR_CTIME) != 0) { 964 if ((attr->ia_valid & ATTR_CTIME) != 0) {
@@ -1002,8 +998,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
1002 NCP_FINFO(inode)->nwattr = info.attributes; 998 NCP_FINFO(inode)->nwattr = info.attributes;
1003#endif 999#endif
1004 } 1000 }
1005 if (!result) 1001 if (result)
1006 result = inode_setattr(inode, attr); 1002 goto out;
1003
1004 setattr_copy(inode, attr);
1005 mark_inode_dirty(inode);
1006
1007out: 1007out:
1008 unlock_kernel(); 1008 unlock_kernel();
1009 return result; 1009 return result;
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index a43d07e7b924..cc1bb33b59b8 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -61,8 +61,8 @@ config NFS_V3_ACL
61 If unsure, say N. 61 If unsure, say N.
62 62
63config NFS_V4 63config NFS_V4
64 bool "NFS client support for NFS version 4 (EXPERIMENTAL)" 64 bool "NFS client support for NFS version 4"
65 depends on NFS_FS && EXPERIMENTAL 65 depends on NFS_FS
66 select RPCSEC_GSS_KRB5 66 select RPCSEC_GSS_KRB5
67 help 67 help
68 This option enables support for version 4 of the NFS protocol 68 This option enables support for version 4 of the NFS protocol
@@ -72,16 +72,16 @@ config NFS_V4
72 space programs which can be found in the Linux nfs-utils package, 72 space programs which can be found in the Linux nfs-utils package,
73 available from http://linux-nfs.org/. 73 available from http://linux-nfs.org/.
74 74
75 If unsure, say N. 75 If unsure, say Y.
76 76
77config NFS_V4_1 77config NFS_V4_1
78 bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)" 78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
79 depends on NFS_V4 && EXPERIMENTAL 79 depends on NFS_V4 && EXPERIMENTAL
80 help 80 help
81 This option enables support for minor version 1 of the NFSv4 protocol 81 This option enables support for minor version 1 of the NFSv4 protocol
82 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. 82 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
83 83
84 Unless you're an NFS developer, say N. 84 If unsure, say N.
85 85
86config ROOT_NFS 86config ROOT_NFS
87 bool "Root file system on NFS" 87 bool "Root file system on NFS"
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index a08770a7e857..930d10fecdaf 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -37,8 +37,8 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
37 if (inode == NULL) 37 if (inode == NULL)
38 goto out_putclient; 38 goto out_putclient;
39 nfsi = NFS_I(inode); 39 nfsi = NFS_I(inode);
40 down_read(&nfsi->rwsem); 40 rcu_read_lock();
41 delegation = nfsi->delegation; 41 delegation = rcu_dereference(nfsi->delegation);
42 if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) 42 if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
43 goto out_iput; 43 goto out_iput;
44 res->size = i_size_read(inode); 44 res->size = i_size_read(inode);
@@ -53,7 +53,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
53 args->bitmap[1]; 53 args->bitmap[1];
54 res->status = 0; 54 res->status = 0;
55out_iput: 55out_iput:
56 up_read(&nfsi->rwsem); 56 rcu_read_unlock();
57 iput(inode); 57 iput(inode);
58out_putclient: 58out_putclient:
59 nfs_put_client(clp); 59 nfs_put_client(clp);
@@ -62,16 +62,6 @@ out:
62 return res->status; 62 return res->status;
63} 63}
64 64
65static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
66{
67#if defined(CONFIG_NFS_V4_1)
68 if (clp->cl_minorversion > 0)
69 return nfs41_validate_delegation_stateid;
70#endif
71 return nfs4_validate_delegation_stateid;
72}
73
74
75__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) 65__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
76{ 66{
77 struct nfs_client *clp; 67 struct nfs_client *clp;
@@ -92,8 +82,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
92 inode = nfs_delegation_find_inode(clp, &args->fh); 82 inode = nfs_delegation_find_inode(clp, &args->fh);
93 if (inode != NULL) { 83 if (inode != NULL) {
94 /* Set up a helper thread to actually return the delegation */ 84 /* Set up a helper thread to actually return the delegation */
95 switch (nfs_async_inode_return_delegation(inode, &args->stateid, 85 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
96 nfs_validate_delegation_stateid(clp))) {
97 case 0: 86 case 0:
98 res = 0; 87 res = 0;
99 break; 88 break;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d25b5257b7a1..4e7df2adb212 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -150,6 +150,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
150 clp->cl_boot_time = CURRENT_TIME; 150 clp->cl_boot_time = CURRENT_TIME;
151 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; 151 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
152 clp->cl_minorversion = cl_init->minorversion; 152 clp->cl_minorversion = cl_init->minorversion;
153 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
153#endif 154#endif
154 cred = rpc_lookup_machine_cred(); 155 cred = rpc_lookup_machine_cred();
155 if (!IS_ERR(cred)) 156 if (!IS_ERR(cred))
@@ -178,7 +179,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
178 clp->cl_session = NULL; 179 clp->cl_session = NULL;
179 } 180 }
180 181
181 clp->cl_call_sync = _nfs4_call_sync; 182 clp->cl_mvops = nfs_v4_minor_ops[0];
182#endif /* CONFIG_NFS_V4_1 */ 183#endif /* CONFIG_NFS_V4_1 */
183} 184}
184 185
@@ -188,7 +189,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
188static void nfs4_destroy_callback(struct nfs_client *clp) 189static void nfs4_destroy_callback(struct nfs_client *clp)
189{ 190{
190 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 191 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
191 nfs_callback_down(clp->cl_minorversion); 192 nfs_callback_down(clp->cl_mvops->minor_version);
192} 193}
193 194
194static void nfs4_shutdown_client(struct nfs_client *clp) 195static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -1126,7 +1127,7 @@ static int nfs4_init_callback(struct nfs_client *clp)
1126 return error; 1127 return error;
1127 } 1128 }
1128 1129
1129 error = nfs_callback_up(clp->cl_minorversion, 1130 error = nfs_callback_up(clp->cl_mvops->minor_version,
1130 clp->cl_rpcclient->cl_xprt); 1131 clp->cl_rpcclient->cl_xprt);
1131 if (error < 0) { 1132 if (error < 0) {
1132 dprintk("%s: failed to start callback. Error = %d\n", 1133 dprintk("%s: failed to start callback. Error = %d\n",
@@ -1143,10 +1144,8 @@ static int nfs4_init_callback(struct nfs_client *clp)
1143 */ 1144 */
1144static int nfs4_init_client_minor_version(struct nfs_client *clp) 1145static int nfs4_init_client_minor_version(struct nfs_client *clp)
1145{ 1146{
1146 clp->cl_call_sync = _nfs4_call_sync;
1147
1148#if defined(CONFIG_NFS_V4_1) 1147#if defined(CONFIG_NFS_V4_1)
1149 if (clp->cl_minorversion) { 1148 if (clp->cl_mvops->minor_version) {
1150 struct nfs4_session *session = NULL; 1149 struct nfs4_session *session = NULL;
1151 /* 1150 /*
1152 * Create the session and mark it expired. 1151 * Create the session and mark it expired.
@@ -1158,7 +1157,13 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1158 return -ENOMEM; 1157 return -ENOMEM;
1159 1158
1160 clp->cl_session = session; 1159 clp->cl_session = session;
1161 clp->cl_call_sync = _nfs4_call_sync_session; 1160 /*
1161 * The create session reply races with the server back
1162 * channel probe. Mark the client NFS_CS_SESSION_INITING
1163 * so that the client back channel can find the
1164 * nfs_client struct
1165 */
1166 clp->cl_cons_state = NFS_CS_SESSION_INITING;
1162 } 1167 }
1163#endif /* CONFIG_NFS_V4_1 */ 1168#endif /* CONFIG_NFS_V4_1 */
1164 1169
@@ -1454,7 +1459,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1454 data->authflavor, 1459 data->authflavor,
1455 parent_server->client->cl_xprt->prot, 1460 parent_server->client->cl_xprt->prot,
1456 parent_server->client->cl_timeout, 1461 parent_server->client->cl_timeout,
1457 parent_client->cl_minorversion); 1462 parent_client->cl_mvops->minor_version);
1458 if (error < 0) 1463 if (error < 0)
1459 goto error; 1464 goto error;
1460 1465
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 301634543974..b9c3c43cea1d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -268,14 +268,6 @@ out:
268 return status; 268 return status;
269} 269}
270 270
271/* Sync all data to disk upon delegation return */
272static void nfs_msync_inode(struct inode *inode)
273{
274 filemap_fdatawrite(inode->i_mapping);
275 nfs_wb_all(inode);
276 filemap_fdatawait(inode->i_mapping);
277}
278
279/* 271/*
280 * Basic procedure for returning a delegation to the server 272 * Basic procedure for returning a delegation to the server
281 */ 273 */
@@ -367,7 +359,7 @@ int nfs_inode_return_delegation(struct inode *inode)
367 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); 359 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
368 spin_unlock(&clp->cl_lock); 360 spin_unlock(&clp->cl_lock);
369 if (delegation != NULL) { 361 if (delegation != NULL) {
370 nfs_msync_inode(inode); 362 nfs_wb_all(inode);
371 err = __nfs_inode_return_delegation(inode, delegation, 1); 363 err = __nfs_inode_return_delegation(inode, delegation, 1);
372 } 364 }
373 } 365 }
@@ -471,9 +463,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
471/* 463/*
472 * Asynchronous delegation recall! 464 * Asynchronous delegation recall!
473 */ 465 */
474int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, 466int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
475 int (*validate_stateid)(struct nfs_delegation *delegation,
476 const nfs4_stateid *stateid))
477{ 467{
478 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 468 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
479 struct nfs_delegation *delegation; 469 struct nfs_delegation *delegation;
@@ -481,7 +471,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
481 rcu_read_lock(); 471 rcu_read_lock();
482 delegation = rcu_dereference(NFS_I(inode)->delegation); 472 delegation = rcu_dereference(NFS_I(inode)->delegation);
483 473
484 if (!validate_stateid(delegation, stateid)) { 474 if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
485 rcu_read_unlock(); 475 rcu_read_unlock();
486 return -ENOENT; 476 return -ENOENT;
487 } 477 }
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 69e7b8140122..2026304bda19 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -34,9 +34,7 @@ enum {
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
36int nfs_inode_return_delegation(struct inode *inode); 36int nfs_inode_return_delegation(struct inode *inode);
37int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, 37int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
38 int (*validate_stateid)(struct nfs_delegation *delegation,
39 const nfs4_stateid *stateid));
40void nfs_inode_return_delegation_noreclaim(struct inode *inode); 38void nfs_inode_return_delegation_noreclaim(struct inode *inode);
41 39
42struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 40struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e60416d3f818..29539ceeb745 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1652,16 +1652,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1652 } 1652 }
1653 } 1653 }
1654 1654
1655 /*
1656 * ... prune child dentries and writebacks if needed.
1657 */
1658 if (atomic_read(&old_dentry->d_count) > 1) {
1659 if (S_ISREG(old_inode->i_mode))
1660 nfs_wb_all(old_inode);
1661 shrink_dcache_parent(old_dentry);
1662 }
1663 nfs_inode_return_delegation(old_inode); 1655 nfs_inode_return_delegation(old_inode);
1664
1665 if (new_inode != NULL) 1656 if (new_inode != NULL)
1666 nfs_inode_return_delegation(new_inode); 1657 nfs_inode_return_delegation(new_inode);
1667 1658
@@ -1953,7 +1944,7 @@ int nfs_permission(struct inode *inode, int mask)
1953 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 1944 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1954 goto out; 1945 goto out;
1955 /* Is this sys_access() ? */ 1946 /* Is this sys_access() ? */
1956 if (mask & MAY_ACCESS) 1947 if (mask & (MAY_ACCESS | MAY_CHDIR))
1957 goto force_lookup; 1948 goto force_lookup;
1958 1949
1959 switch (inode->i_mode & S_IFMT) { 1950 switch (inode->i_mode & S_IFMT) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ad4cd31d6050..064a80961677 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -69,6 +69,7 @@ struct nfs_direct_req {
69 69
70 /* I/O parameters */ 70 /* I/O parameters */
71 struct nfs_open_context *ctx; /* file open context info */ 71 struct nfs_open_context *ctx; /* file open context info */
72 struct nfs_lock_context *l_ctx; /* Lock context info */
72 struct kiocb * iocb; /* controlling i/o request */ 73 struct kiocb * iocb; /* controlling i/o request */
73 struct inode * inode; /* target file of i/o */ 74 struct inode * inode; /* target file of i/o */
74 75
@@ -160,6 +161,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
160 INIT_LIST_HEAD(&dreq->rewrite_list); 161 INIT_LIST_HEAD(&dreq->rewrite_list);
161 dreq->iocb = NULL; 162 dreq->iocb = NULL;
162 dreq->ctx = NULL; 163 dreq->ctx = NULL;
164 dreq->l_ctx = NULL;
163 spin_lock_init(&dreq->lock); 165 spin_lock_init(&dreq->lock);
164 atomic_set(&dreq->io_count, 0); 166 atomic_set(&dreq->io_count, 0);
165 dreq->count = 0; 167 dreq->count = 0;
@@ -173,6 +175,8 @@ static void nfs_direct_req_free(struct kref *kref)
173{ 175{
174 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); 176 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
175 177
178 if (dreq->l_ctx != NULL)
179 nfs_put_lock_context(dreq->l_ctx);
176 if (dreq->ctx != NULL) 180 if (dreq->ctx != NULL)
177 put_nfs_open_context(dreq->ctx); 181 put_nfs_open_context(dreq->ctx);
178 kmem_cache_free(nfs_direct_cachep, dreq); 182 kmem_cache_free(nfs_direct_cachep, dreq);
@@ -336,6 +340,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
336 data->cred = msg.rpc_cred; 340 data->cred = msg.rpc_cred;
337 data->args.fh = NFS_FH(inode); 341 data->args.fh = NFS_FH(inode);
338 data->args.context = ctx; 342 data->args.context = ctx;
343 data->args.lock_context = dreq->l_ctx;
339 data->args.offset = pos; 344 data->args.offset = pos;
340 data->args.pgbase = pgbase; 345 data->args.pgbase = pgbase;
341 data->args.pages = data->pagevec; 346 data->args.pages = data->pagevec;
@@ -416,24 +421,28 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
416static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, 421static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
417 unsigned long nr_segs, loff_t pos) 422 unsigned long nr_segs, loff_t pos)
418{ 423{
419 ssize_t result = 0; 424 ssize_t result = -ENOMEM;
420 struct inode *inode = iocb->ki_filp->f_mapping->host; 425 struct inode *inode = iocb->ki_filp->f_mapping->host;
421 struct nfs_direct_req *dreq; 426 struct nfs_direct_req *dreq;
422 427
423 dreq = nfs_direct_req_alloc(); 428 dreq = nfs_direct_req_alloc();
424 if (!dreq) 429 if (dreq == NULL)
425 return -ENOMEM; 430 goto out;
426 431
427 dreq->inode = inode; 432 dreq->inode = inode;
428 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 433 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
434 dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
435 if (dreq->l_ctx == NULL)
436 goto out_release;
429 if (!is_sync_kiocb(iocb)) 437 if (!is_sync_kiocb(iocb))
430 dreq->iocb = iocb; 438 dreq->iocb = iocb;
431 439
432 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); 440 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
433 if (!result) 441 if (!result)
434 result = nfs_direct_wait(dreq); 442 result = nfs_direct_wait(dreq);
443out_release:
435 nfs_direct_req_release(dreq); 444 nfs_direct_req_release(dreq);
436 445out:
437 return result; 446 return result;
438} 447}
439 448
@@ -574,6 +583,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
574 data->args.offset = 0; 583 data->args.offset = 0;
575 data->args.count = 0; 584 data->args.count = 0;
576 data->args.context = dreq->ctx; 585 data->args.context = dreq->ctx;
586 data->args.lock_context = dreq->l_ctx;
577 data->res.count = 0; 587 data->res.count = 0;
578 data->res.fattr = &data->fattr; 588 data->res.fattr = &data->fattr;
579 data->res.verf = &data->verf; 589 data->res.verf = &data->verf;
@@ -761,6 +771,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
761 data->cred = msg.rpc_cred; 771 data->cred = msg.rpc_cred;
762 data->args.fh = NFS_FH(inode); 772 data->args.fh = NFS_FH(inode);
763 data->args.context = ctx; 773 data->args.context = ctx;
774 data->args.lock_context = dreq->l_ctx;
764 data->args.offset = pos; 775 data->args.offset = pos;
765 data->args.pgbase = pgbase; 776 data->args.pgbase = pgbase;
766 data->args.pages = data->pagevec; 777 data->args.pages = data->pagevec;
@@ -845,7 +856,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
845 unsigned long nr_segs, loff_t pos, 856 unsigned long nr_segs, loff_t pos,
846 size_t count) 857 size_t count)
847{ 858{
848 ssize_t result = 0; 859 ssize_t result = -ENOMEM;
849 struct inode *inode = iocb->ki_filp->f_mapping->host; 860 struct inode *inode = iocb->ki_filp->f_mapping->host;
850 struct nfs_direct_req *dreq; 861 struct nfs_direct_req *dreq;
851 size_t wsize = NFS_SERVER(inode)->wsize; 862 size_t wsize = NFS_SERVER(inode)->wsize;
@@ -853,7 +864,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
853 864
854 dreq = nfs_direct_req_alloc(); 865 dreq = nfs_direct_req_alloc();
855 if (!dreq) 866 if (!dreq)
856 return -ENOMEM; 867 goto out;
857 nfs_alloc_commit_data(dreq); 868 nfs_alloc_commit_data(dreq);
858 869
859 if (dreq->commit_data == NULL || count < wsize) 870 if (dreq->commit_data == NULL || count < wsize)
@@ -861,14 +872,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
861 872
862 dreq->inode = inode; 873 dreq->inode = inode;
863 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
876 if (dreq->l_ctx != NULL)
877 goto out_release;
864 if (!is_sync_kiocb(iocb)) 878 if (!is_sync_kiocb(iocb))
865 dreq->iocb = iocb; 879 dreq->iocb = iocb;
866 880
867 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); 881 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
868 if (!result) 882 if (!result)
869 result = nfs_direct_wait(dreq); 883 result = nfs_direct_wait(dreq);
884out_release:
870 nfs_direct_req_release(dreq); 885 nfs_direct_req_release(dreq);
871 886out:
872 return result; 887 return result;
873} 888}
874 889
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 36a5e74f51b4..2d141a74ae82 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -27,6 +27,7 @@
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/aio.h> 28#include <linux/aio.h>
29#include <linux/gfp.h> 29#include <linux/gfp.h>
30#include <linux/swap.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32#include <asm/system.h> 33#include <asm/system.h>
@@ -202,37 +203,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
202} 203}
203 204
204/* 205/*
205 * Helper for nfs_file_flush() and nfs_file_fsync()
206 *
207 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
208 * disk, but it retrieves and clears ctx->error after synching, despite
209 * the two being set at the same time in nfs_context_set_write_error().
210 * This is because the former is used to notify the _next_ call to
211 * nfs_file_write() that a write error occured, and hence cause it to
212 * fall back to doing a synchronous write.
213 */
214static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
215{
216 int have_error, status;
217 int ret = 0;
218
219 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
220 status = nfs_wb_all(inode);
221 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
222 if (have_error)
223 ret = xchg(&ctx->error, 0);
224 if (!ret)
225 ret = status;
226 return ret;
227}
228
229/*
230 * Flush all dirty pages, and check for write errors. 206 * Flush all dirty pages, and check for write errors.
231 */ 207 */
232static int 208static int
233nfs_file_flush(struct file *file, fl_owner_t id) 209nfs_file_flush(struct file *file, fl_owner_t id)
234{ 210{
235 struct nfs_open_context *ctx = nfs_file_open_context(file);
236 struct dentry *dentry = file->f_path.dentry; 211 struct dentry *dentry = file->f_path.dentry;
237 struct inode *inode = dentry->d_inode; 212 struct inode *inode = dentry->d_inode;
238 213
@@ -245,7 +220,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
245 return 0; 220 return 0;
246 221
247 /* Flush writes to the server and return any errors */ 222 /* Flush writes to the server and return any errors */
248 return nfs_do_fsync(ctx, inode); 223 return vfs_fsync(file, 0);
249} 224}
250 225
251static ssize_t 226static ssize_t
@@ -320,6 +295,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
320 * Flush any dirty pages for this process, and check for write errors. 295 * Flush any dirty pages for this process, and check for write errors.
321 * The return status from this call provides a reliable indication of 296 * The return status from this call provides a reliable indication of
322 * whether any write errors occurred for this process. 297 * whether any write errors occurred for this process.
298 *
299 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
300 * disk, but it retrieves and clears ctx->error after synching, despite
301 * the two being set at the same time in nfs_context_set_write_error().
302 * This is because the former is used to notify the _next_ call to
303 * nfs_file_write() that a write error occured, and hence cause it to
304 * fall back to doing a synchronous write.
323 */ 305 */
324static int 306static int
325nfs_file_fsync(struct file *file, int datasync) 307nfs_file_fsync(struct file *file, int datasync)
@@ -327,13 +309,23 @@ nfs_file_fsync(struct file *file, int datasync)
327 struct dentry *dentry = file->f_path.dentry; 309 struct dentry *dentry = file->f_path.dentry;
328 struct nfs_open_context *ctx = nfs_file_open_context(file); 310 struct nfs_open_context *ctx = nfs_file_open_context(file);
329 struct inode *inode = dentry->d_inode; 311 struct inode *inode = dentry->d_inode;
312 int have_error, status;
313 int ret = 0;
314
330 315
331 dprintk("NFS: fsync file(%s/%s) datasync %d\n", 316 dprintk("NFS: fsync file(%s/%s) datasync %d\n",
332 dentry->d_parent->d_name.name, dentry->d_name.name, 317 dentry->d_parent->d_name.name, dentry->d_name.name,
333 datasync); 318 datasync);
334 319
335 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 320 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
336 return nfs_do_fsync(ctx, inode); 321 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
322 status = nfs_commit_inode(inode, FLUSH_SYNC);
323 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
324 if (have_error)
325 ret = xchg(&ctx->error, 0);
326 if (!ret)
327 ret = status;
328 return ret;
337} 329}
338 330
339/* 331/*
@@ -493,11 +485,19 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
493 */ 485 */
494static int nfs_release_page(struct page *page, gfp_t gfp) 486static int nfs_release_page(struct page *page, gfp_t gfp)
495{ 487{
488 struct address_space *mapping = page->mapping;
489
496 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 490 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
497 491
498 /* Only do I/O if gfp is a superset of GFP_KERNEL */ 492 /* Only do I/O if gfp is a superset of GFP_KERNEL */
499 if ((gfp & GFP_KERNEL) == GFP_KERNEL) 493 if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
500 nfs_wb_page(page->mapping->host, page); 494 int how = FLUSH_SYNC;
495
496 /* Don't let kswapd deadlock waiting for OOM RPC calls */
497 if (current_is_kswapd())
498 how = 0;
499 nfs_commit_inode(mapping->host, how);
500 }
501 /* If PagePrivate() is set, then the page is not freeable */ 501 /* If PagePrivate() is set, then the page is not freeable */
502 if (PagePrivate(page)) 502 if (PagePrivate(page))
503 return 0; 503 return 0;
@@ -639,7 +639,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
639 639
640 /* Return error values for O_DSYNC and IS_SYNC() */ 640 /* Return error values for O_DSYNC and IS_SYNC() */
641 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 641 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
642 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 642 int err = vfs_fsync(iocb->ki_filp, 0);
643 if (err < 0) 643 if (err < 0)
644 result = err; 644 result = err;
645 } 645 }
@@ -675,7 +675,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
675 written = ret; 675 written = ret;
676 676
677 if (ret >= 0 && nfs_need_sync_write(filp, inode)) { 677 if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
678 int err = nfs_do_fsync(nfs_file_open_context(filp), inode); 678 int err = vfs_fsync(filp, 0);
679 if (err < 0) 679 if (err < 0)
680 ret = err; 680 ret = err;
681 } 681 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 099b3518feea..7d2d6c72aa78 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -98,7 +98,7 @@ u64 nfs_compat_user_ino64(u64 fileid)
98 return ino; 98 return ino;
99} 99}
100 100
101void nfs_clear_inode(struct inode *inode) 101static void nfs_clear_inode(struct inode *inode)
102{ 102{
103 /* 103 /*
104 * The following should never happen... 104 * The following should never happen...
@@ -110,6 +110,13 @@ void nfs_clear_inode(struct inode *inode)
110 nfs_fscache_release_inode_cookie(inode); 110 nfs_fscache_release_inode_cookie(inode);
111} 111}
112 112
113void nfs_evict_inode(struct inode *inode)
114{
115 truncate_inode_pages(&inode->i_data, 0);
116 end_writeback(inode);
117 nfs_clear_inode(inode);
118}
119
113/** 120/**
114 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk 121 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
115 */ 122 */
@@ -413,10 +420,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
413 return 0; 420 return 0;
414 421
415 /* Write all dirty data */ 422 /* Write all dirty data */
416 if (S_ISREG(inode->i_mode)) { 423 if (S_ISREG(inode->i_mode))
417 filemap_write_and_wait(inode->i_mapping);
418 nfs_wb_all(inode); 424 nfs_wb_all(inode);
419 }
420 425
421 fattr = nfs_alloc_fattr(); 426 fattr = nfs_alloc_fattr();
422 if (fattr == NULL) 427 if (fattr == NULL)
@@ -530,6 +535,68 @@ out:
530 return err; 535 return err;
531} 536}
532 537
538static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
539{
540 atomic_set(&l_ctx->count, 1);
541 l_ctx->lockowner = current->files;
542 l_ctx->pid = current->tgid;
543 INIT_LIST_HEAD(&l_ctx->list);
544}
545
546static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
547{
548 struct nfs_lock_context *pos;
549
550 list_for_each_entry(pos, &ctx->lock_context.list, list) {
551 if (pos->lockowner != current->files)
552 continue;
553 if (pos->pid != current->tgid)
554 continue;
555 atomic_inc(&pos->count);
556 return pos;
557 }
558 return NULL;
559}
560
561struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
562{
563 struct nfs_lock_context *res, *new = NULL;
564 struct inode *inode = ctx->path.dentry->d_inode;
565
566 spin_lock(&inode->i_lock);
567 res = __nfs_find_lock_context(ctx);
568 if (res == NULL) {
569 spin_unlock(&inode->i_lock);
570 new = kmalloc(sizeof(*new), GFP_KERNEL);
571 if (new == NULL)
572 return NULL;
573 nfs_init_lock_context(new);
574 spin_lock(&inode->i_lock);
575 res = __nfs_find_lock_context(ctx);
576 if (res == NULL) {
577 list_add_tail(&new->list, &ctx->lock_context.list);
578 new->open_context = ctx;
579 res = new;
580 new = NULL;
581 }
582 }
583 spin_unlock(&inode->i_lock);
584 kfree(new);
585 return res;
586}
587
588void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
589{
590 struct nfs_open_context *ctx = l_ctx->open_context;
591 struct inode *inode = ctx->path.dentry->d_inode;
592
593 if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
594 return;
595 list_del(&l_ctx->list);
596 spin_unlock(&inode->i_lock);
597 kfree(l_ctx);
598}
599
533/** 600/**
534 * nfs_close_context - Common close_context() routine NFSv2/v3 601 * nfs_close_context - Common close_context() routine NFSv2/v3
535 * @ctx: pointer to context 602 * @ctx: pointer to context
@@ -566,11 +633,11 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
566 path_get(&ctx->path); 633 path_get(&ctx->path);
567 ctx->cred = get_rpccred(cred); 634 ctx->cred = get_rpccred(cred);
568 ctx->state = NULL; 635 ctx->state = NULL;
569 ctx->lockowner = current->files;
570 ctx->flags = 0; 636 ctx->flags = 0;
571 ctx->error = 0; 637 ctx->error = 0;
572 ctx->dir_cookie = 0; 638 ctx->dir_cookie = 0;
573 atomic_set(&ctx->count, 1); 639 nfs_init_lock_context(&ctx->lock_context);
640 ctx->lock_context.open_context = ctx;
574 } 641 }
575 return ctx; 642 return ctx;
576} 643}
@@ -578,7 +645,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
578struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) 645struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
579{ 646{
580 if (ctx != NULL) 647 if (ctx != NULL)
581 atomic_inc(&ctx->count); 648 atomic_inc(&ctx->lock_context.count);
582 return ctx; 649 return ctx;
583} 650}
584 651
@@ -586,7 +653,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
586{ 653{
587 struct inode *inode = ctx->path.dentry->d_inode; 654 struct inode *inode = ctx->path.dentry->d_inode;
588 655
589 if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) 656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
590 return; 657 return;
591 list_del(&ctx->list); 658 list_del(&ctx->list);
592 spin_unlock(&inode->i_lock); 659 spin_unlock(&inode->i_lock);
@@ -1338,8 +1405,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1338 * to open() calls that passed nfs_atomic_lookup, but failed to call 1405 * to open() calls that passed nfs_atomic_lookup, but failed to call
1339 * nfs_open(). 1406 * nfs_open().
1340 */ 1407 */
1341void nfs4_clear_inode(struct inode *inode) 1408void nfs4_evict_inode(struct inode *inode)
1342{ 1409{
1410 truncate_inode_pages(&inode->i_data, 0);
1411 end_writeback(inode);
1343 /* If we are holding a delegation, return it! */ 1412 /* If we are holding a delegation, return it! */
1344 nfs_inode_return_delegation_noreclaim(inode); 1413 nfs_inode_return_delegation_noreclaim(inode);
1345 /* First call standard NFS clear_inode() code */ 1414 /* First call standard NFS clear_inode() code */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e70f44b9b3f4..c961bc92c107 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -213,9 +213,9 @@ extern struct workqueue_struct *nfsiod_workqueue;
213extern struct inode *nfs_alloc_inode(struct super_block *sb); 213extern struct inode *nfs_alloc_inode(struct super_block *sb);
214extern void nfs_destroy_inode(struct inode *); 214extern void nfs_destroy_inode(struct inode *);
215extern int nfs_write_inode(struct inode *, struct writeback_control *); 215extern int nfs_write_inode(struct inode *, struct writeback_control *);
216extern void nfs_clear_inode(struct inode *); 216extern void nfs_evict_inode(struct inode *);
217#ifdef CONFIG_NFS_V4 217#ifdef CONFIG_NFS_V4
218extern void nfs4_clear_inode(struct inode *); 218extern void nfs4_evict_inode(struct inode *);
219#endif 219#endif
220void nfs_zap_acl_cache(struct inode *inode); 220void nfs_zap_acl_cache(struct inode *inode);
221extern int nfs_wait_bit_killable(void *word); 221extern int nfs_wait_bit_killable(void *word);
@@ -370,10 +370,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
370 * Helper for restarting RPC calls in the possible presence of NFSv4.1 370 * Helper for restarting RPC calls in the possible presence of NFSv4.1
371 * sessions. 371 * sessions.
372 */ 372 */
373static inline void nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) 373static inline int nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp)
374{ 374{
375 if (nfs4_has_session(clp)) 375 if (nfs4_has_session(clp))
376 rpc_restart_call_prepare(task); 376 return rpc_restart_call_prepare(task);
377 else 377 return rpc_restart_call(task);
378 rpc_restart_call(task);
379} 378}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 81cf14257916..db8846a0e82e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -233,7 +233,7 @@ nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs
233static int 233static int
234nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 234nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
235{ 235{
236 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 236 struct rpc_auth *auth = req->rq_cred->cr_auth;
237 unsigned int replen; 237 unsigned int replen;
238 u32 offset = (u32)args->offset; 238 u32 offset = (u32)args->offset;
239 u32 count = args->count; 239 u32 count = args->count;
@@ -393,8 +393,7 @@ nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *arg
393static int 393static int
394nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) 394nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
395{ 395{
396 struct rpc_task *task = req->rq_task; 396 struct rpc_auth *auth = req->rq_cred->cr_auth;
397 struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
398 unsigned int replen; 397 unsigned int replen;
399 u32 count = args->count; 398 u32 count = args->count;
400 399
@@ -575,7 +574,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
575static int 574static int
576nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) 575nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
577{ 576{
578 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 577 struct rpc_auth *auth = req->rq_cred->cr_auth;
579 unsigned int replen; 578 unsigned int replen;
580 579
581 p = xdr_encode_fhandle(p, args->fh); 580 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 75dcfc7da365..9769704f8ce6 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -330,7 +330,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
330static int 330static int
331nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 331nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
332{ 332{
333 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 333 struct rpc_auth *auth = req->rq_cred->cr_auth;
334 unsigned int replen; 334 unsigned int replen;
335 u32 count = args->count; 335 u32 count = args->count;
336 336
@@ -471,7 +471,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
471static int 471static int
472nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) 472nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
473{ 473{
474 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 474 struct rpc_auth *auth = req->rq_cred->cr_auth;
475 unsigned int replen; 475 unsigned int replen;
476 u32 count = args->count; 476 u32 count = args->count;
477 477
@@ -675,7 +675,7 @@ static int
675nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, 675nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
676 struct nfs3_getaclargs *args) 676 struct nfs3_getaclargs *args)
677{ 677{
678 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 678 struct rpc_auth *auth = req->rq_cred->cr_auth;
679 unsigned int replen; 679 unsigned int replen;
680 680
681 p = xdr_encode_fhandle(p, args->fh); 681 p = xdr_encode_fhandle(p, args->fh);
@@ -802,7 +802,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
802static int 802static int
803nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) 803nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
804{ 804{
805 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 805 struct rpc_auth *auth = req->rq_cred->cr_auth;
806 unsigned int replen; 806 unsigned int replen;
807 807
808 p = xdr_encode_fhandle(p, args->fh); 808 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c538c6106e16..311e15cc8af0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -45,10 +45,29 @@ enum nfs4_client_state {
45 NFS4CLNT_RECLAIM_NOGRACE, 45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_SESSION_RESET, 47 NFS4CLNT_SESSION_RESET,
48 NFS4CLNT_SESSION_DRAINING,
49 NFS4CLNT_RECALL_SLOT, 48 NFS4CLNT_RECALL_SLOT,
50}; 49};
51 50
51enum nfs4_session_state {
52 NFS4_SESSION_INITING,
53 NFS4_SESSION_DRAINING,
54};
55
56struct nfs4_minor_version_ops {
57 u32 minor_version;
58
59 int (*call_sync)(struct nfs_server *server,
60 struct rpc_message *msg,
61 struct nfs4_sequence_args *args,
62 struct nfs4_sequence_res *res,
63 int cache_reply);
64 int (*validate_stateid)(struct nfs_delegation *,
65 const nfs4_stateid *);
66 const struct nfs4_state_recovery_ops *reboot_recovery_ops;
67 const struct nfs4_state_recovery_ops *nograce_recovery_ops;
68 const struct nfs4_state_maintenance_ops *state_renewal_ops;
69};
70
52/* 71/*
53 * struct rpc_sequence ensures that RPC calls are sent in the exact 72 * struct rpc_sequence ensures that RPC calls are sent in the exact
54 * order that they appear on the list. 73 * order that they appear on the list.
@@ -89,7 +108,6 @@ struct nfs_unique_id {
89 */ 108 */
90struct nfs4_state_owner { 109struct nfs4_state_owner {
91 struct nfs_unique_id so_owner_id; 110 struct nfs_unique_id so_owner_id;
92 struct nfs_client *so_client;
93 struct nfs_server *so_server; 111 struct nfs_server *so_server;
94 struct rb_node so_client_node; 112 struct rb_node so_client_node;
95 113
@@ -99,7 +117,6 @@ struct nfs4_state_owner {
99 atomic_t so_count; 117 atomic_t so_count;
100 unsigned long so_flags; 118 unsigned long so_flags;
101 struct list_head so_states; 119 struct list_head so_states;
102 struct list_head so_delegations;
103 struct nfs_seqid_counter so_seqid; 120 struct nfs_seqid_counter so_seqid;
104 struct rpc_sequence so_sequence; 121 struct rpc_sequence so_sequence;
105}; 122};
@@ -125,10 +142,20 @@ enum {
125 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) 142 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
126 */ 143 */
127 144
145struct nfs4_lock_owner {
146 unsigned int lo_type;
147#define NFS4_ANY_LOCK_TYPE (0U)
148#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
149#define NFS4_POSIX_LOCK_TYPE (1U << 1)
150 union {
151 fl_owner_t posix_owner;
152 pid_t flock_owner;
153 } lo_u;
154};
155
128struct nfs4_lock_state { 156struct nfs4_lock_state {
129 struct list_head ls_locks; /* Other lock stateids */ 157 struct list_head ls_locks; /* Other lock stateids */
130 struct nfs4_state * ls_state; /* Pointer to open state */ 158 struct nfs4_state * ls_state; /* Pointer to open state */
131 fl_owner_t ls_owner; /* POSIX lock owner */
132#define NFS_LOCK_INITIALIZED 1 159#define NFS_LOCK_INITIALIZED 1
133 int ls_flags; 160 int ls_flags;
134 struct nfs_seqid_counter ls_seqid; 161 struct nfs_seqid_counter ls_seqid;
@@ -136,6 +163,7 @@ struct nfs4_lock_state {
136 struct nfs_unique_id ls_id; 163 struct nfs_unique_id ls_id;
137 nfs4_stateid ls_stateid; 164 nfs4_stateid ls_stateid;
138 atomic_t ls_count; 165 atomic_t ls_count;
166 struct nfs4_lock_owner ls_owner;
139}; 167};
140 168
141/* bits for nfs4_state->flags */ 169/* bits for nfs4_state->flags */
@@ -219,11 +247,15 @@ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nam
219extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 247extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
220extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 248extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
221 struct nfs4_fs_locations *fs_locations, struct page *page); 249 struct nfs4_fs_locations *fs_locations, struct page *page);
250extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
222 251
223extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
224extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
225#if defined(CONFIG_NFS_V4_1) 252#if defined(CONFIG_NFS_V4_1)
226extern int nfs4_setup_sequence(struct nfs_client *clp, 253static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
254{
255 return server->nfs_client->cl_session;
256}
257
258extern int nfs4_setup_sequence(const struct nfs_server *server,
227 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 259 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
228 int cache_reply, struct rpc_task *task); 260 int cache_reply, struct rpc_task *task);
229extern void nfs4_destroy_session(struct nfs4_session *session); 261extern void nfs4_destroy_session(struct nfs4_session *session);
@@ -234,7 +266,12 @@ extern int nfs4_init_session(struct nfs_server *server);
234extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 266extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
235 struct nfs_fsinfo *fsinfo); 267 struct nfs_fsinfo *fsinfo);
236#else /* CONFIG_NFS_v4_1 */ 268#else /* CONFIG_NFS_v4_1 */
237static inline int nfs4_setup_sequence(struct nfs_client *clp, 269static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
270{
271 return NULL;
272}
273
274static inline int nfs4_setup_sequence(const struct nfs_server *server,
238 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 275 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
239 int cache_reply, struct rpc_task *task) 276 int cache_reply, struct rpc_task *task)
240{ 277{
@@ -247,7 +284,7 @@ static inline int nfs4_init_session(struct nfs_server *server)
247} 284}
248#endif /* CONFIG_NFS_V4_1 */ 285#endif /* CONFIG_NFS_V4_1 */
249 286
250extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[]; 287extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
251 288
252extern const u32 nfs4_fattr_bitmap[2]; 289extern const u32 nfs4_fattr_bitmap[2];
253extern const u32 nfs4_statfs_bitmap[2]; 290extern const u32 nfs4_statfs_bitmap[2];
@@ -284,7 +321,7 @@ extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
284extern void nfs41_handle_recall_slot(struct nfs_client *clp); 321extern void nfs41_handle_recall_slot(struct nfs_client *clp);
285extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 322extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
286extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 323extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
287extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 324extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
288 325
289extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); 326extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
290extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 327extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 70015dd60a98..7ffbb98ddec3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -303,15 +303,19 @@ do_state_recovery:
303} 303}
304 304
305 305
306static void renew_lease(const struct nfs_server *server, unsigned long timestamp) 306static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
307{ 307{
308 struct nfs_client *clp = server->nfs_client;
309 spin_lock(&clp->cl_lock); 308 spin_lock(&clp->cl_lock);
310 if (time_before(clp->cl_last_renewal,timestamp)) 309 if (time_before(clp->cl_last_renewal,timestamp))
311 clp->cl_last_renewal = timestamp; 310 clp->cl_last_renewal = timestamp;
312 spin_unlock(&clp->cl_lock); 311 spin_unlock(&clp->cl_lock);
313} 312}
314 313
314static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
315{
316 do_renew_lease(server->nfs_client, timestamp);
317}
318
315#if defined(CONFIG_NFS_V4_1) 319#if defined(CONFIG_NFS_V4_1)
316 320
317/* 321/*
@@ -356,7 +360,7 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
356{ 360{
357 struct rpc_task *task; 361 struct rpc_task *task;
358 362
359 if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) { 363 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
360 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); 364 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
361 if (task) 365 if (task)
362 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); 366 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
@@ -370,12 +374,11 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
370 complete(&ses->complete); 374 complete(&ses->complete);
371} 375}
372 376
373static void nfs41_sequence_free_slot(const struct nfs_client *clp, 377static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
374 struct nfs4_sequence_res *res)
375{ 378{
376 struct nfs4_slot_table *tbl; 379 struct nfs4_slot_table *tbl;
377 380
378 tbl = &clp->cl_session->fc_slot_table; 381 tbl = &res->sr_session->fc_slot_table;
379 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { 382 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
380 /* just wake up the next guy waiting since 383 /* just wake up the next guy waiting since
381 * we may have not consumed a slot after all */ 384 * we may have not consumed a slot after all */
@@ -385,18 +388,17 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp,
385 388
386 spin_lock(&tbl->slot_tbl_lock); 389 spin_lock(&tbl->slot_tbl_lock);
387 nfs4_free_slot(tbl, res->sr_slotid); 390 nfs4_free_slot(tbl, res->sr_slotid);
388 nfs41_check_drain_session_complete(clp->cl_session); 391 nfs41_check_drain_session_complete(res->sr_session);
389 spin_unlock(&tbl->slot_tbl_lock); 392 spin_unlock(&tbl->slot_tbl_lock);
390 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 393 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
391} 394}
392 395
393static void nfs41_sequence_done(struct nfs_client *clp, 396static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
394 struct nfs4_sequence_res *res,
395 int rpc_status)
396{ 397{
397 unsigned long timestamp; 398 unsigned long timestamp;
398 struct nfs4_slot_table *tbl; 399 struct nfs4_slot_table *tbl;
399 struct nfs4_slot *slot; 400 struct nfs4_slot *slot;
401 struct nfs_client *clp;
400 402
401 /* 403 /*
402 * sr_status remains 1 if an RPC level error occurred. The server 404 * sr_status remains 1 if an RPC level error occurred. The server
@@ -411,25 +413,51 @@ static void nfs41_sequence_done(struct nfs_client *clp,
411 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) 413 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
412 goto out; 414 goto out;
413 415
416 tbl = &res->sr_session->fc_slot_table;
417 slot = tbl->slots + res->sr_slotid;
418
414 /* Check the SEQUENCE operation status */ 419 /* Check the SEQUENCE operation status */
415 if (res->sr_status == 0) { 420 switch (res->sr_status) {
416 tbl = &clp->cl_session->fc_slot_table; 421 case 0:
417 slot = tbl->slots + res->sr_slotid;
418 /* Update the slot's sequence and clientid lease timer */ 422 /* Update the slot's sequence and clientid lease timer */
419 ++slot->seq_nr; 423 ++slot->seq_nr;
420 timestamp = res->sr_renewal_time; 424 timestamp = res->sr_renewal_time;
421 spin_lock(&clp->cl_lock); 425 clp = res->sr_session->clp;
422 if (time_before(clp->cl_last_renewal, timestamp)) 426 do_renew_lease(clp, timestamp);
423 clp->cl_last_renewal = timestamp;
424 spin_unlock(&clp->cl_lock);
425 /* Check sequence flags */ 427 /* Check sequence flags */
426 if (atomic_read(&clp->cl_count) > 1) 428 if (atomic_read(&clp->cl_count) > 1)
427 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 429 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
430 break;
431 case -NFS4ERR_DELAY:
432 /* The server detected a resend of the RPC call and
433 * returned NFS4ERR_DELAY as per Section 2.10.6.2
434 * of RFC5661.
435 */
436 dprintk("%s: slot=%d seq=%d: Operation in progress\n",
437 __func__, res->sr_slotid, slot->seq_nr);
438 goto out_retry;
439 default:
440 /* Just update the slot sequence no. */
441 ++slot->seq_nr;
428 } 442 }
429out: 443out:
430 /* The session may be reset by one of the error handlers. */ 444 /* The session may be reset by one of the error handlers. */
431 dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); 445 dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
432 nfs41_sequence_free_slot(clp, res); 446 nfs41_sequence_free_slot(res);
447 return 1;
448out_retry:
449 if (!rpc_restart_call(task))
450 goto out;
451 rpc_delay(task, NFS4_POLL_RETRY_MAX);
452 return 0;
453}
454
455static int nfs4_sequence_done(struct rpc_task *task,
456 struct nfs4_sequence_res *res)
457{
458 if (res->sr_session == NULL)
459 return 1;
460 return nfs41_sequence_done(task, res);
433} 461}
434 462
435/* 463/*
@@ -480,12 +508,11 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
480 if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) 508 if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
481 return 0; 509 return 0;
482 510
483 memset(res, 0, sizeof(*res));
484 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 511 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
485 tbl = &session->fc_slot_table; 512 tbl = &session->fc_slot_table;
486 513
487 spin_lock(&tbl->slot_tbl_lock); 514 spin_lock(&tbl->slot_tbl_lock);
488 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) && 515 if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
489 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { 516 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
490 /* 517 /*
491 * The state manager will wait until the slot table is empty. 518 * The state manager will wait until the slot table is empty.
@@ -525,6 +552,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
525 res->sr_session = session; 552 res->sr_session = session;
526 res->sr_slotid = slotid; 553 res->sr_slotid = slotid;
527 res->sr_renewal_time = jiffies; 554 res->sr_renewal_time = jiffies;
555 res->sr_status_flags = 0;
528 /* 556 /*
529 * sr_status is only set in decode_sequence, and so will remain 557 * sr_status is only set in decode_sequence, and so will remain
530 * set to 1 if an rpc level failure occurs. 558 * set to 1 if an rpc level failure occurs.
@@ -533,33 +561,33 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
533 return 0; 561 return 0;
534} 562}
535 563
536int nfs4_setup_sequence(struct nfs_client *clp, 564int nfs4_setup_sequence(const struct nfs_server *server,
537 struct nfs4_sequence_args *args, 565 struct nfs4_sequence_args *args,
538 struct nfs4_sequence_res *res, 566 struct nfs4_sequence_res *res,
539 int cache_reply, 567 int cache_reply,
540 struct rpc_task *task) 568 struct rpc_task *task)
541{ 569{
570 struct nfs4_session *session = nfs4_get_session(server);
542 int ret = 0; 571 int ret = 0;
543 572
573 if (session == NULL) {
574 args->sa_session = NULL;
575 res->sr_session = NULL;
576 goto out;
577 }
578
544 dprintk("--> %s clp %p session %p sr_slotid %d\n", 579 dprintk("--> %s clp %p session %p sr_slotid %d\n",
545 __func__, clp, clp->cl_session, res->sr_slotid); 580 __func__, session->clp, session, res->sr_slotid);
546 581
547 if (!nfs4_has_session(clp)) 582 ret = nfs41_setup_sequence(session, args, res, cache_reply,
548 goto out;
549 ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
550 task); 583 task);
551 if (ret && ret != -EAGAIN) {
552 /* terminate rpc task */
553 task->tk_status = ret;
554 task->tk_action = NULL;
555 }
556out: 584out:
557 dprintk("<-- %s status=%d\n", __func__, ret); 585 dprintk("<-- %s status=%d\n", __func__, ret);
558 return ret; 586 return ret;
559} 587}
560 588
561struct nfs41_call_sync_data { 589struct nfs41_call_sync_data {
562 struct nfs_client *clp; 590 const struct nfs_server *seq_server;
563 struct nfs4_sequence_args *seq_args; 591 struct nfs4_sequence_args *seq_args;
564 struct nfs4_sequence_res *seq_res; 592 struct nfs4_sequence_res *seq_res;
565 int cache_reply; 593 int cache_reply;
@@ -569,9 +597,9 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
569{ 597{
570 struct nfs41_call_sync_data *data = calldata; 598 struct nfs41_call_sync_data *data = calldata;
571 599
572 dprintk("--> %s data->clp->cl_session %p\n", __func__, 600 dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
573 data->clp->cl_session); 601
574 if (nfs4_setup_sequence(data->clp, data->seq_args, 602 if (nfs4_setup_sequence(data->seq_server, data->seq_args,
575 data->seq_res, data->cache_reply, task)) 603 data->seq_res, data->cache_reply, task))
576 return; 604 return;
577 rpc_call_start(task); 605 rpc_call_start(task);
@@ -587,7 +615,7 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
587{ 615{
588 struct nfs41_call_sync_data *data = calldata; 616 struct nfs41_call_sync_data *data = calldata;
589 617
590 nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); 618 nfs41_sequence_done(task, data->seq_res);
591} 619}
592 620
593struct rpc_call_ops nfs41_call_sync_ops = { 621struct rpc_call_ops nfs41_call_sync_ops = {
@@ -600,8 +628,7 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = {
600 .rpc_call_done = nfs41_call_sync_done, 628 .rpc_call_done = nfs41_call_sync_done,
601}; 629};
602 630
603static int nfs4_call_sync_sequence(struct nfs_client *clp, 631static int nfs4_call_sync_sequence(struct nfs_server *server,
604 struct rpc_clnt *clnt,
605 struct rpc_message *msg, 632 struct rpc_message *msg,
606 struct nfs4_sequence_args *args, 633 struct nfs4_sequence_args *args,
607 struct nfs4_sequence_res *res, 634 struct nfs4_sequence_res *res,
@@ -611,13 +638,13 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp,
611 int ret; 638 int ret;
612 struct rpc_task *task; 639 struct rpc_task *task;
613 struct nfs41_call_sync_data data = { 640 struct nfs41_call_sync_data data = {
614 .clp = clp, 641 .seq_server = server,
615 .seq_args = args, 642 .seq_args = args,
616 .seq_res = res, 643 .seq_res = res,
617 .cache_reply = cache_reply, 644 .cache_reply = cache_reply,
618 }; 645 };
619 struct rpc_task_setup task_setup = { 646 struct rpc_task_setup task_setup = {
620 .rpc_client = clnt, 647 .rpc_client = server->client,
621 .rpc_message = msg, 648 .rpc_message = msg,
622 .callback_ops = &nfs41_call_sync_ops, 649 .callback_ops = &nfs41_call_sync_ops,
623 .callback_data = &data 650 .callback_data = &data
@@ -642,10 +669,15 @@ int _nfs4_call_sync_session(struct nfs_server *server,
642 struct nfs4_sequence_res *res, 669 struct nfs4_sequence_res *res,
643 int cache_reply) 670 int cache_reply)
644{ 671{
645 return nfs4_call_sync_sequence(server->nfs_client, server->client, 672 return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
646 msg, args, res, cache_reply, 0);
647} 673}
648 674
675#else
676static int nfs4_sequence_done(struct rpc_task *task,
677 struct nfs4_sequence_res *res)
678{
679 return 1;
680}
649#endif /* CONFIG_NFS_V4_1 */ 681#endif /* CONFIG_NFS_V4_1 */
650 682
651int _nfs4_call_sync(struct nfs_server *server, 683int _nfs4_call_sync(struct nfs_server *server,
@@ -659,18 +691,9 @@ int _nfs4_call_sync(struct nfs_server *server,
659} 691}
660 692
661#define nfs4_call_sync(server, msg, args, res, cache_reply) \ 693#define nfs4_call_sync(server, msg, args, res, cache_reply) \
662 (server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \ 694 (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
663 &(res)->seq_res, (cache_reply)) 695 &(res)->seq_res, (cache_reply))
664 696
665static void nfs4_sequence_done(const struct nfs_server *server,
666 struct nfs4_sequence_res *res, int rpc_status)
667{
668#ifdef CONFIG_NFS_V4_1
669 if (nfs4_has_session(server->nfs_client))
670 nfs41_sequence_done(server->nfs_client, res, rpc_status);
671#endif /* CONFIG_NFS_V4_1 */
672}
673
674static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 697static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
675{ 698{
676 struct nfs_inode *nfsi = NFS_I(dir); 699 struct nfs_inode *nfsi = NFS_I(dir);
@@ -745,19 +768,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
745 p->o_arg.server = server; 768 p->o_arg.server = server;
746 p->o_arg.bitmask = server->attr_bitmask; 769 p->o_arg.bitmask = server->attr_bitmask;
747 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 770 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
748 if (flags & O_EXCL) { 771 if (flags & O_CREAT) {
749 if (nfs4_has_persistent_session(server->nfs_client)) { 772 u32 *s;
750 /* GUARDED */ 773
751 p->o_arg.u.attrs = &p->attrs;
752 memcpy(&p->attrs, attrs, sizeof(p->attrs));
753 } else { /* EXCLUSIVE4_1 */
754 u32 *s = (u32 *) p->o_arg.u.verifier.data;
755 s[0] = jiffies;
756 s[1] = current->pid;
757 }
758 } else if (flags & O_CREAT) {
759 p->o_arg.u.attrs = &p->attrs; 774 p->o_arg.u.attrs = &p->attrs;
760 memcpy(&p->attrs, attrs, sizeof(p->attrs)); 775 memcpy(&p->attrs, attrs, sizeof(p->attrs));
776 s = (u32 *) p->o_arg.u.verifier.data;
777 s[0] = jiffies;
778 s[1] = current->pid;
761 } 779 }
762 p->c_arg.fh = &p->o_res.fh; 780 p->c_arg.fh = &p->o_res.fh;
763 p->c_arg.stateid = &p->o_res.stateid; 781 p->c_arg.stateid = &p->o_res.stateid;
@@ -1255,8 +1273,6 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
1255 struct nfs4_opendata *data = calldata; 1273 struct nfs4_opendata *data = calldata;
1256 1274
1257 data->rpc_status = task->tk_status; 1275 data->rpc_status = task->tk_status;
1258 if (RPC_ASSASSINATED(task))
1259 return;
1260 if (data->rpc_status == 0) { 1276 if (data->rpc_status == 0) {
1261 memcpy(data->o_res.stateid.data, data->c_res.stateid.data, 1277 memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
1262 sizeof(data->o_res.stateid.data)); 1278 sizeof(data->o_res.stateid.data));
@@ -1356,13 +1372,13 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1356 } 1372 }
1357 /* Update sequence id. */ 1373 /* Update sequence id. */
1358 data->o_arg.id = sp->so_owner_id.id; 1374 data->o_arg.id = sp->so_owner_id.id;
1359 data->o_arg.clientid = sp->so_client->cl_clientid; 1375 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
1360 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { 1376 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
1361 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 1377 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
1362 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); 1378 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
1363 } 1379 }
1364 data->timestamp = jiffies; 1380 data->timestamp = jiffies;
1365 if (nfs4_setup_sequence(data->o_arg.server->nfs_client, 1381 if (nfs4_setup_sequence(data->o_arg.server,
1366 &data->o_arg.seq_args, 1382 &data->o_arg.seq_args,
1367 &data->o_res.seq_res, 1, task)) 1383 &data->o_res.seq_res, 1, task))
1368 return; 1384 return;
@@ -1385,11 +1401,9 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
1385 1401
1386 data->rpc_status = task->tk_status; 1402 data->rpc_status = task->tk_status;
1387 1403
1388 nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res, 1404 if (!nfs4_sequence_done(task, &data->o_res.seq_res))
1389 task->tk_status);
1390
1391 if (RPC_ASSASSINATED(task))
1392 return; 1405 return;
1406
1393 if (task->tk_status == 0) { 1407 if (task->tk_status == 0) {
1394 switch (data->o_res.f_attr->mode & S_IFMT) { 1408 switch (data->o_res.f_attr->mode & S_IFMT) {
1395 case S_IFREG: 1409 case S_IFREG:
@@ -1773,7 +1787,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1773 if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { 1787 if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
1774 /* Use that stateid */ 1788 /* Use that stateid */
1775 } else if (state != NULL) { 1789 } else if (state != NULL) {
1776 nfs4_copy_stateid(&arg.stateid, state, current->files); 1790 nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
1777 } else 1791 } else
1778 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1792 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
1779 1793
@@ -1838,8 +1852,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1838 struct nfs4_state *state = calldata->state; 1852 struct nfs4_state *state = calldata->state;
1839 struct nfs_server *server = NFS_SERVER(calldata->inode); 1853 struct nfs_server *server = NFS_SERVER(calldata->inode);
1840 1854
1841 nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status); 1855 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
1842 if (RPC_ASSASSINATED(task))
1843 return; 1856 return;
1844 /* hmm. we are done with the inode, and in the process of freeing 1857 /* hmm. we are done with the inode, and in the process of freeing
1845 * the state_owner. we keep this around to process errors 1858 * the state_owner. we keep this around to process errors
@@ -1903,7 +1916,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1903 1916
1904 nfs_fattr_init(calldata->res.fattr); 1917 nfs_fattr_init(calldata->res.fattr);
1905 calldata->timestamp = jiffies; 1918 calldata->timestamp = jiffies;
1906 if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, 1919 if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
1907 &calldata->arg.seq_args, &calldata->res.seq_res, 1920 &calldata->arg.seq_args, &calldata->res.seq_res,
1908 1, task)) 1921 1, task))
1909 return; 1922 return;
@@ -2648,7 +2661,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2648{ 2661{
2649 struct nfs_removeres *res = task->tk_msg.rpc_resp; 2662 struct nfs_removeres *res = task->tk_msg.rpc_resp;
2650 2663
2651 nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); 2664 if (!nfs4_sequence_done(task, &res->seq_res))
2665 return 0;
2652 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2666 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2653 return 0; 2667 return 0;
2654 update_changeattr(dir, &res->cinfo); 2668 update_changeattr(dir, &res->cinfo);
@@ -3093,7 +3107,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3093 3107
3094 dprintk("--> %s\n", __func__); 3108 dprintk("--> %s\n", __func__);
3095 3109
3096 nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); 3110 if (!nfs4_sequence_done(task, &data->res.seq_res))
3111 return -EAGAIN;
3097 3112
3098 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3113 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3099 nfs_restart_rpc(task, server->nfs_client); 3114 nfs_restart_rpc(task, server->nfs_client);
@@ -3116,8 +3131,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3116{ 3131{
3117 struct inode *inode = data->inode; 3132 struct inode *inode = data->inode;
3118 3133
3119 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, 3134 if (!nfs4_sequence_done(task, &data->res.seq_res))
3120 task->tk_status); 3135 return -EAGAIN;
3121 3136
3122 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3137 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3123 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3138 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3145,8 +3160,9 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3145{ 3160{
3146 struct inode *inode = data->inode; 3161 struct inode *inode = data->inode;
3147 3162
3148 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, 3163 if (!nfs4_sequence_done(task, &data->res.seq_res))
3149 task->tk_status); 3164 return -EAGAIN;
3165
3150 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3166 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3151 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3167 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3152 return -EAGAIN; 3168 return -EAGAIN;
@@ -3196,10 +3212,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3196 nfs4_schedule_state_recovery(clp); 3212 nfs4_schedule_state_recovery(clp);
3197 return; 3213 return;
3198 } 3214 }
3199 spin_lock(&clp->cl_lock); 3215 do_renew_lease(clp, timestamp);
3200 if (time_before(clp->cl_last_renewal,timestamp))
3201 clp->cl_last_renewal = timestamp;
3202 spin_unlock(&clp->cl_lock);
3203} 3216}
3204 3217
3205static const struct rpc_call_ops nfs4_renew_ops = { 3218static const struct rpc_call_ops nfs4_renew_ops = {
@@ -3240,10 +3253,7 @@ int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
3240 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3253 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
3241 if (status < 0) 3254 if (status < 0)
3242 return status; 3255 return status;
3243 spin_lock(&clp->cl_lock); 3256 do_renew_lease(clp, now);
3244 if (time_before(clp->cl_last_renewal,now))
3245 clp->cl_last_renewal = now;
3246 spin_unlock(&clp->cl_lock);
3247 return 0; 3257 return 0;
3248} 3258}
3249 3259
@@ -3464,9 +3474,11 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
3464} 3474}
3465 3475
3466static int 3476static int
3467_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state) 3477nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
3468{ 3478{
3469 if (!clp || task->tk_status >= 0) 3479 struct nfs_client *clp = server->nfs_client;
3480
3481 if (task->tk_status >= 0)
3470 return 0; 3482 return 0;
3471 switch(task->tk_status) { 3483 switch(task->tk_status) {
3472 case -NFS4ERR_ADMIN_REVOKED: 3484 case -NFS4ERR_ADMIN_REVOKED:
@@ -3498,8 +3510,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3498 return -EAGAIN; 3510 return -EAGAIN;
3499#endif /* CONFIG_NFS_V4_1 */ 3511#endif /* CONFIG_NFS_V4_1 */
3500 case -NFS4ERR_DELAY: 3512 case -NFS4ERR_DELAY:
3501 if (server) 3513 nfs_inc_server_stats(server, NFSIOS_DELAY);
3502 nfs_inc_server_stats(server, NFSIOS_DELAY);
3503 case -NFS4ERR_GRACE: 3514 case -NFS4ERR_GRACE:
3504 case -EKEYEXPIRED: 3515 case -EKEYEXPIRED:
3505 rpc_delay(task, NFS4_POLL_RETRY_MAX); 3516 rpc_delay(task, NFS4_POLL_RETRY_MAX);
@@ -3520,12 +3531,6 @@ do_state_recovery:
3520 return -EAGAIN; 3531 return -EAGAIN;
3521} 3532}
3522 3533
3523static int
3524nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
3525{
3526 return _nfs4_async_handle_error(task, server, server->nfs_client, state);
3527}
3528
3529int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, 3534int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3530 unsigned short port, struct rpc_cred *cred, 3535 unsigned short port, struct rpc_cred *cred,
3531 struct nfs4_setclientid_res *res) 3536 struct nfs4_setclientid_res *res)
@@ -3641,8 +3646,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
3641{ 3646{
3642 struct nfs4_delegreturndata *data = calldata; 3647 struct nfs4_delegreturndata *data = calldata;
3643 3648
3644 nfs4_sequence_done(data->res.server, &data->res.seq_res, 3649 if (!nfs4_sequence_done(task, &data->res.seq_res))
3645 task->tk_status); 3650 return;
3646 3651
3647 switch (task->tk_status) { 3652 switch (task->tk_status) {
3648 case -NFS4ERR_STALE_STATEID: 3653 case -NFS4ERR_STALE_STATEID:
@@ -3672,7 +3677,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
3672 3677
3673 d_data = (struct nfs4_delegreturndata *)data; 3678 d_data = (struct nfs4_delegreturndata *)data;
3674 3679
3675 if (nfs4_setup_sequence(d_data->res.server->nfs_client, 3680 if (nfs4_setup_sequence(d_data->res.server,
3676 &d_data->args.seq_args, 3681 &d_data->args.seq_args,
3677 &d_data->res.seq_res, 1, task)) 3682 &d_data->res.seq_res, 1, task))
3678 return; 3683 return;
@@ -3892,9 +3897,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3892{ 3897{
3893 struct nfs4_unlockdata *calldata = data; 3898 struct nfs4_unlockdata *calldata = data;
3894 3899
3895 nfs4_sequence_done(calldata->server, &calldata->res.seq_res, 3900 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
3896 task->tk_status);
3897 if (RPC_ASSASSINATED(task))
3898 return; 3901 return;
3899 switch (task->tk_status) { 3902 switch (task->tk_status) {
3900 case 0: 3903 case 0:
@@ -3927,7 +3930,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
3927 return; 3930 return;
3928 } 3931 }
3929 calldata->timestamp = jiffies; 3932 calldata->timestamp = jiffies;
3930 if (nfs4_setup_sequence(calldata->server->nfs_client, 3933 if (nfs4_setup_sequence(calldata->server,
3931 &calldata->arg.seq_args, 3934 &calldata->arg.seq_args,
3932 &calldata->res.seq_res, 1, task)) 3935 &calldata->res.seq_res, 1, task))
3933 return; 3936 return;
@@ -4082,7 +4085,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4082 } else 4085 } else
4083 data->arg.new_lock_owner = 0; 4086 data->arg.new_lock_owner = 0;
4084 data->timestamp = jiffies; 4087 data->timestamp = jiffies;
4085 if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, 4088 if (nfs4_setup_sequence(data->server,
4089 &data->arg.seq_args,
4086 &data->res.seq_res, 1, task)) 4090 &data->res.seq_res, 1, task))
4087 return; 4091 return;
4088 rpc_call_start(task); 4092 rpc_call_start(task);
@@ -4101,12 +4105,10 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
4101 4105
4102 dprintk("%s: begin!\n", __func__); 4106 dprintk("%s: begin!\n", __func__);
4103 4107
4104 nfs4_sequence_done(data->server, &data->res.seq_res, 4108 if (!nfs4_sequence_done(task, &data->res.seq_res))
4105 task->tk_status); 4109 return;
4106 4110
4107 data->rpc_status = task->tk_status; 4111 data->rpc_status = task->tk_status;
4108 if (RPC_ASSASSINATED(task))
4109 goto out;
4110 if (data->arg.new_lock_owner != 0) { 4112 if (data->arg.new_lock_owner != 0) {
4111 if (data->rpc_status == 0) 4113 if (data->rpc_status == 0)
4112 nfs_confirm_seqid(&data->lsp->ls_seqid, 0); 4114 nfs_confirm_seqid(&data->lsp->ls_seqid, 0);
@@ -4424,6 +4426,34 @@ out:
4424 return err; 4426 return err;
4425} 4427}
4426 4428
4429static void nfs4_release_lockowner_release(void *calldata)
4430{
4431 kfree(calldata);
4432}
4433
4434const struct rpc_call_ops nfs4_release_lockowner_ops = {
4435 .rpc_release = nfs4_release_lockowner_release,
4436};
4437
4438void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
4439{
4440 struct nfs_server *server = lsp->ls_state->owner->so_server;
4441 struct nfs_release_lockowner_args *args;
4442 struct rpc_message msg = {
4443 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
4444 };
4445
4446 if (server->nfs_client->cl_mvops->minor_version != 0)
4447 return;
4448 args = kmalloc(sizeof(*args), GFP_NOFS);
4449 if (!args)
4450 return;
4451 args->lock_owner.clientid = server->nfs_client->cl_clientid;
4452 args->lock_owner.id = lsp->ls_id.id;
4453 msg.rpc_argp = args;
4454 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
4455}
4456
4427#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 4457#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
4428 4458
4429int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, 4459int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
@@ -4611,7 +4641,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4611 (struct nfs4_get_lease_time_data *)calldata; 4641 (struct nfs4_get_lease_time_data *)calldata;
4612 4642
4613 dprintk("--> %s\n", __func__); 4643 dprintk("--> %s\n", __func__);
4614 nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status); 4644 if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
4645 return;
4615 switch (task->tk_status) { 4646 switch (task->tk_status) {
4616 case -NFS4ERR_DELAY: 4647 case -NFS4ERR_DELAY:
4617 case -NFS4ERR_GRACE: 4648 case -NFS4ERR_GRACE:
@@ -4805,13 +4836,6 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4805 if (!session) 4836 if (!session)
4806 return NULL; 4837 return NULL;
4807 4838
4808 /*
4809 * The create session reply races with the server back
4810 * channel probe. Mark the client NFS_CS_SESSION_INITING
4811 * so that the client back channel can find the
4812 * nfs_client struct
4813 */
4814 clp->cl_cons_state = NFS_CS_SESSION_INITING;
4815 init_completion(&session->complete); 4839 init_completion(&session->complete);
4816 4840
4817 tbl = &session->fc_slot_table; 4841 tbl = &session->fc_slot_table;
@@ -4824,6 +4848,8 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4824 spin_lock_init(&tbl->slot_tbl_lock); 4848 spin_lock_init(&tbl->slot_tbl_lock);
4825 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); 4849 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
4826 4850
4851 session->session_state = 1<<NFS4_SESSION_INITING;
4852
4827 session->clp = clp; 4853 session->clp = clp;
4828 return session; 4854 return session;
4829} 4855}
@@ -5040,6 +5066,10 @@ int nfs4_init_session(struct nfs_server *server)
5040 if (!nfs4_has_session(clp)) 5066 if (!nfs4_has_session(clp))
5041 return 0; 5067 return 0;
5042 5068
5069 session = clp->cl_session;
5070 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
5071 return 0;
5072
5043 rsize = server->rsize; 5073 rsize = server->rsize;
5044 if (rsize == 0) 5074 if (rsize == 0)
5045 rsize = NFS_MAX_FILE_IO_SIZE; 5075 rsize = NFS_MAX_FILE_IO_SIZE;
@@ -5047,7 +5077,6 @@ int nfs4_init_session(struct nfs_server *server)
5047 if (wsize == 0) 5077 if (wsize == 0)
5048 wsize = NFS_MAX_FILE_IO_SIZE; 5078 wsize = NFS_MAX_FILE_IO_SIZE;
5049 5079
5050 session = clp->cl_session;
5051 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; 5080 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
5052 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; 5081 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
5053 5082
@@ -5060,69 +5089,70 @@ int nfs4_init_session(struct nfs_server *server)
5060/* 5089/*
5061 * Renew the cl_session lease. 5090 * Renew the cl_session lease.
5062 */ 5091 */
5063static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) 5092struct nfs4_sequence_data {
5064{ 5093 struct nfs_client *clp;
5065 struct nfs4_sequence_args args; 5094 struct nfs4_sequence_args args;
5066 struct nfs4_sequence_res res; 5095 struct nfs4_sequence_res res;
5067 5096};
5068 struct rpc_message msg = {
5069 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
5070 .rpc_argp = &args,
5071 .rpc_resp = &res,
5072 .rpc_cred = cred,
5073 };
5074
5075 args.sa_cache_this = 0;
5076
5077 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
5078 &res, args.sa_cache_this, 1);
5079}
5080 5097
5081static void nfs41_sequence_release(void *data) 5098static void nfs41_sequence_release(void *data)
5082{ 5099{
5083 struct nfs_client *clp = (struct nfs_client *)data; 5100 struct nfs4_sequence_data *calldata = data;
5101 struct nfs_client *clp = calldata->clp;
5084 5102
5085 if (atomic_read(&clp->cl_count) > 1) 5103 if (atomic_read(&clp->cl_count) > 1)
5086 nfs4_schedule_state_renewal(clp); 5104 nfs4_schedule_state_renewal(clp);
5087 nfs_put_client(clp); 5105 nfs_put_client(clp);
5106 kfree(calldata);
5107}
5108
5109static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
5110{
5111 switch(task->tk_status) {
5112 case -NFS4ERR_DELAY:
5113 case -EKEYEXPIRED:
5114 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5115 return -EAGAIN;
5116 default:
5117 nfs4_schedule_state_recovery(clp);
5118 }
5119 return 0;
5088} 5120}
5089 5121
5090static void nfs41_sequence_call_done(struct rpc_task *task, void *data) 5122static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
5091{ 5123{
5092 struct nfs_client *clp = (struct nfs_client *)data; 5124 struct nfs4_sequence_data *calldata = data;
5125 struct nfs_client *clp = calldata->clp;
5093 5126
5094 nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status); 5127 if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
5128 return;
5095 5129
5096 if (task->tk_status < 0) { 5130 if (task->tk_status < 0) {
5097 dprintk("%s ERROR %d\n", __func__, task->tk_status); 5131 dprintk("%s ERROR %d\n", __func__, task->tk_status);
5098 if (atomic_read(&clp->cl_count) == 1) 5132 if (atomic_read(&clp->cl_count) == 1)
5099 goto out; 5133 goto out;
5100 5134
5101 if (_nfs4_async_handle_error(task, NULL, clp, NULL) 5135 if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
5102 == -EAGAIN) { 5136 rpc_restart_call_prepare(task);
5103 nfs_restart_rpc(task, clp);
5104 return; 5137 return;
5105 } 5138 }
5106 } 5139 }
5107 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); 5140 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
5108out: 5141out:
5109 kfree(task->tk_msg.rpc_argp);
5110 kfree(task->tk_msg.rpc_resp);
5111
5112 dprintk("<-- %s\n", __func__); 5142 dprintk("<-- %s\n", __func__);
5113} 5143}
5114 5144
5115static void nfs41_sequence_prepare(struct rpc_task *task, void *data) 5145static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
5116{ 5146{
5117 struct nfs_client *clp; 5147 struct nfs4_sequence_data *calldata = data;
5148 struct nfs_client *clp = calldata->clp;
5118 struct nfs4_sequence_args *args; 5149 struct nfs4_sequence_args *args;
5119 struct nfs4_sequence_res *res; 5150 struct nfs4_sequence_res *res;
5120 5151
5121 clp = (struct nfs_client *)data;
5122 args = task->tk_msg.rpc_argp; 5152 args = task->tk_msg.rpc_argp;
5123 res = task->tk_msg.rpc_resp; 5153 res = task->tk_msg.rpc_resp;
5124 5154
5125 if (nfs4_setup_sequence(clp, args, res, 0, task)) 5155 if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
5126 return; 5156 return;
5127 rpc_call_start(task); 5157 rpc_call_start(task);
5128} 5158}
@@ -5133,32 +5163,67 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
5133 .rpc_release = nfs41_sequence_release, 5163 .rpc_release = nfs41_sequence_release,
5134}; 5164};
5135 5165
5136static int nfs41_proc_async_sequence(struct nfs_client *clp, 5166static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5137 struct rpc_cred *cred)
5138{ 5167{
5139 struct nfs4_sequence_args *args; 5168 struct nfs4_sequence_data *calldata;
5140 struct nfs4_sequence_res *res;
5141 struct rpc_message msg = { 5169 struct rpc_message msg = {
5142 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], 5170 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
5143 .rpc_cred = cred, 5171 .rpc_cred = cred,
5144 }; 5172 };
5173 struct rpc_task_setup task_setup_data = {
5174 .rpc_client = clp->cl_rpcclient,
5175 .rpc_message = &msg,
5176 .callback_ops = &nfs41_sequence_ops,
5177 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
5178 };
5145 5179
5146 if (!atomic_inc_not_zero(&clp->cl_count)) 5180 if (!atomic_inc_not_zero(&clp->cl_count))
5147 return -EIO; 5181 return ERR_PTR(-EIO);
5148 args = kzalloc(sizeof(*args), GFP_NOFS); 5182 calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
5149 res = kzalloc(sizeof(*res), GFP_NOFS); 5183 if (calldata == NULL) {
5150 if (!args || !res) {
5151 kfree(args);
5152 kfree(res);
5153 nfs_put_client(clp); 5184 nfs_put_client(clp);
5154 return -ENOMEM; 5185 return ERR_PTR(-ENOMEM);
5155 } 5186 }
5156 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 5187 calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5157 msg.rpc_argp = args; 5188 msg.rpc_argp = &calldata->args;
5158 msg.rpc_resp = res; 5189 msg.rpc_resp = &calldata->res;
5190 calldata->clp = clp;
5191 task_setup_data.callback_data = calldata;
5159 5192
5160 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, 5193 return rpc_run_task(&task_setup_data);
5161 &nfs41_sequence_ops, (void *)clp); 5194}
5195
5196static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5197{
5198 struct rpc_task *task;
5199 int ret = 0;
5200
5201 task = _nfs41_proc_sequence(clp, cred);
5202 if (IS_ERR(task))
5203 ret = PTR_ERR(task);
5204 else
5205 rpc_put_task(task);
5206 dprintk("<-- %s status=%d\n", __func__, ret);
5207 return ret;
5208}
5209
5210static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5211{
5212 struct rpc_task *task;
5213 int ret;
5214
5215 task = _nfs41_proc_sequence(clp, cred);
5216 if (IS_ERR(task)) {
5217 ret = PTR_ERR(task);
5218 goto out;
5219 }
5220 ret = rpc_wait_for_completion_task(task);
5221 if (!ret)
5222 ret = task->tk_status;
5223 rpc_put_task(task);
5224out:
5225 dprintk("<-- %s status=%d\n", __func__, ret);
5226 return ret;
5162} 5227}
5163 5228
5164struct nfs4_reclaim_complete_data { 5229struct nfs4_reclaim_complete_data {
@@ -5172,13 +5237,31 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
5172 struct nfs4_reclaim_complete_data *calldata = data; 5237 struct nfs4_reclaim_complete_data *calldata = data;
5173 5238
5174 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); 5239 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
5175 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, 5240 if (nfs41_setup_sequence(calldata->clp->cl_session,
5241 &calldata->arg.seq_args,
5176 &calldata->res.seq_res, 0, task)) 5242 &calldata->res.seq_res, 0, task))
5177 return; 5243 return;
5178 5244
5179 rpc_call_start(task); 5245 rpc_call_start(task);
5180} 5246}
5181 5247
5248static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
5249{
5250 switch(task->tk_status) {
5251 case 0:
5252 case -NFS4ERR_COMPLETE_ALREADY:
5253 case -NFS4ERR_WRONG_CRED: /* What to do here? */
5254 break;
5255 case -NFS4ERR_DELAY:
5256 case -EKEYEXPIRED:
5257 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5258 return -EAGAIN;
5259 default:
5260 nfs4_schedule_state_recovery(clp);
5261 }
5262 return 0;
5263}
5264
5182static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) 5265static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
5183{ 5266{
5184 struct nfs4_reclaim_complete_data *calldata = data; 5267 struct nfs4_reclaim_complete_data *calldata = data;
@@ -5186,32 +5269,13 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
5186 struct nfs4_sequence_res *res = &calldata->res.seq_res; 5269 struct nfs4_sequence_res *res = &calldata->res.seq_res;
5187 5270
5188 dprintk("--> %s\n", __func__); 5271 dprintk("--> %s\n", __func__);
5189 nfs41_sequence_done(clp, res, task->tk_status); 5272 if (!nfs41_sequence_done(task, res))
5190 switch (task->tk_status) { 5273 return;
5191 case 0:
5192 case -NFS4ERR_COMPLETE_ALREADY:
5193 break;
5194 case -NFS4ERR_BADSESSION:
5195 case -NFS4ERR_DEADSESSION:
5196 /*
5197 * Handle the session error, but do not retry the operation, as
5198 * we have no way of telling whether the clientid had to be
5199 * reset before we got our reply. If reset, a new wave of
5200 * reclaim operations will follow, containing their own reclaim
5201 * complete. We don't want our retry to get on the way of
5202 * recovery by incorrectly indicating to the server that we're
5203 * done reclaiming state since the process had to be restarted.
5204 */
5205 _nfs4_async_handle_error(task, NULL, clp, NULL);
5206 break;
5207 default:
5208 if (_nfs4_async_handle_error(
5209 task, NULL, clp, NULL) == -EAGAIN) {
5210 rpc_restart_call_prepare(task);
5211 return;
5212 }
5213 }
5214 5274
5275 if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
5276 rpc_restart_call_prepare(task);
5277 return;
5278 }
5215 dprintk("<-- %s\n", __func__); 5279 dprintk("<-- %s\n", __func__);
5216} 5280}
5217 5281
@@ -5325,28 +5389,30 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
5325}; 5389};
5326#endif 5390#endif
5327 5391
5328/* 5392static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
5329 * Per minor version reboot and network partition recovery ops 5393 .minor_version = 0,
5330 */ 5394 .call_sync = _nfs4_call_sync,
5331 5395 .validate_stateid = nfs4_validate_delegation_stateid,
5332struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = { 5396 .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
5333 &nfs40_reboot_recovery_ops, 5397 .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
5334#if defined(CONFIG_NFS_V4_1) 5398 .state_renewal_ops = &nfs40_state_renewal_ops,
5335 &nfs41_reboot_recovery_ops,
5336#endif
5337}; 5399};
5338 5400
5339struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
5340 &nfs40_nograce_recovery_ops,
5341#if defined(CONFIG_NFS_V4_1) 5401#if defined(CONFIG_NFS_V4_1)
5342 &nfs41_nograce_recovery_ops, 5402static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
5343#endif 5403 .minor_version = 1,
5404 .call_sync = _nfs4_call_sync_session,
5405 .validate_stateid = nfs41_validate_delegation_stateid,
5406 .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
5407 .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
5408 .state_renewal_ops = &nfs41_state_renewal_ops,
5344}; 5409};
5410#endif
5345 5411
5346struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = { 5412const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
5347 &nfs40_state_renewal_ops, 5413 [0] = &nfs_v4_0_minor_ops,
5348#if defined(CONFIG_NFS_V4_1) 5414#if defined(CONFIG_NFS_V4_1)
5349 &nfs41_state_renewal_ops, 5415 [1] = &nfs_v4_1_minor_ops,
5350#endif 5416#endif
5351}; 5417};
5352 5418
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index d87f10327b72..72b6c580af13 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -54,14 +54,14 @@
54void 54void
55nfs4_renew_state(struct work_struct *work) 55nfs4_renew_state(struct work_struct *work)
56{ 56{
57 struct nfs4_state_maintenance_ops *ops; 57 const struct nfs4_state_maintenance_ops *ops;
58 struct nfs_client *clp = 58 struct nfs_client *clp =
59 container_of(work, struct nfs_client, cl_renewd.work); 59 container_of(work, struct nfs_client, cl_renewd.work);
60 struct rpc_cred *cred; 60 struct rpc_cred *cred;
61 long lease; 61 long lease;
62 unsigned long last, now; 62 unsigned long last, now;
63 63
64 ops = nfs4_state_renewal_ops[clp->cl_minorversion]; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 /* Are there any active superblocks? */ 66 /* Are there any active superblocks? */
67 if (list_empty(&clp->cl_superblocks)) 67 if (list_empty(&clp->cl_superblocks))
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 34acf5926fdc..3e2f19b04c06 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -145,7 +145,9 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
145 struct nfs4_session *ses = clp->cl_session; 145 struct nfs4_session *ses = clp->cl_session;
146 int max_slots; 146 int max_slots;
147 147
148 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) { 148 if (ses == NULL)
149 return;
150 if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
149 spin_lock(&ses->fc_slot_table.slot_tbl_lock); 151 spin_lock(&ses->fc_slot_table.slot_tbl_lock);
150 max_slots = ses->fc_slot_table.max_slots; 152 max_slots = ses->fc_slot_table.max_slots;
151 while (max_slots--) { 153 while (max_slots--) {
@@ -167,7 +169,7 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
167 struct nfs4_slot_table *tbl = &ses->fc_slot_table; 169 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
168 170
169 spin_lock(&tbl->slot_tbl_lock); 171 spin_lock(&tbl->slot_tbl_lock);
170 set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); 172 set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
171 if (tbl->highest_used_slotid != -1) { 173 if (tbl->highest_used_slotid != -1) {
172 INIT_COMPLETION(ses->complete); 174 INIT_COMPLETION(ses->complete);
173 spin_unlock(&tbl->slot_tbl_lock); 175 spin_unlock(&tbl->slot_tbl_lock);
@@ -371,7 +373,6 @@ nfs4_alloc_state_owner(void)
371 return NULL; 373 return NULL;
372 spin_lock_init(&sp->so_lock); 374 spin_lock_init(&sp->so_lock);
373 INIT_LIST_HEAD(&sp->so_states); 375 INIT_LIST_HEAD(&sp->so_states);
374 INIT_LIST_HEAD(&sp->so_delegations);
375 rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); 376 rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
376 sp->so_seqid.sequence = &sp->so_sequence; 377 sp->so_seqid.sequence = &sp->so_sequence;
377 spin_lock_init(&sp->so_sequence.lock); 378 spin_lock_init(&sp->so_sequence.lock);
@@ -384,7 +385,7 @@ static void
384nfs4_drop_state_owner(struct nfs4_state_owner *sp) 385nfs4_drop_state_owner(struct nfs4_state_owner *sp)
385{ 386{
386 if (!RB_EMPTY_NODE(&sp->so_client_node)) { 387 if (!RB_EMPTY_NODE(&sp->so_client_node)) {
387 struct nfs_client *clp = sp->so_client; 388 struct nfs_client *clp = sp->so_server->nfs_client;
388 389
389 spin_lock(&clp->cl_lock); 390 spin_lock(&clp->cl_lock);
390 rb_erase(&sp->so_client_node, &clp->cl_state_owners); 391 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
@@ -406,7 +407,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
406 new = nfs4_alloc_state_owner(); 407 new = nfs4_alloc_state_owner();
407 if (new == NULL) 408 if (new == NULL)
408 return NULL; 409 return NULL;
409 new->so_client = clp;
410 new->so_server = server; 410 new->so_server = server;
411 new->so_cred = cred; 411 new->so_cred = cred;
412 spin_lock(&clp->cl_lock); 412 spin_lock(&clp->cl_lock);
@@ -423,7 +423,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
423 423
424void nfs4_put_state_owner(struct nfs4_state_owner *sp) 424void nfs4_put_state_owner(struct nfs4_state_owner *sp)
425{ 425{
426 struct nfs_client *clp = sp->so_client; 426 struct nfs_client *clp = sp->so_server->nfs_client;
427 struct rpc_cred *cred = sp->so_cred; 427 struct rpc_cred *cred = sp->so_cred;
428 428
429 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 429 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -602,12 +602,21 @@ void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
602 * that is compatible with current->files 602 * that is compatible with current->files
603 */ 603 */
604static struct nfs4_lock_state * 604static struct nfs4_lock_state *
605__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) 605__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
606{ 606{
607 struct nfs4_lock_state *pos; 607 struct nfs4_lock_state *pos;
608 list_for_each_entry(pos, &state->lock_states, ls_locks) { 608 list_for_each_entry(pos, &state->lock_states, ls_locks) {
609 if (pos->ls_owner != fl_owner) 609 if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
610 continue; 610 continue;
611 switch (pos->ls_owner.lo_type) {
612 case NFS4_POSIX_LOCK_TYPE:
613 if (pos->ls_owner.lo_u.posix_owner != fl_owner)
614 continue;
615 break;
616 case NFS4_FLOCK_LOCK_TYPE:
617 if (pos->ls_owner.lo_u.flock_owner != fl_pid)
618 continue;
619 }
611 atomic_inc(&pos->ls_count); 620 atomic_inc(&pos->ls_count);
612 return pos; 621 return pos;
613 } 622 }
@@ -619,10 +628,10 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
619 * exists, return an uninitialized one. 628 * exists, return an uninitialized one.
620 * 629 *
621 */ 630 */
622static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) 631static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
623{ 632{
624 struct nfs4_lock_state *lsp; 633 struct nfs4_lock_state *lsp;
625 struct nfs_client *clp = state->owner->so_client; 634 struct nfs_client *clp = state->owner->so_server->nfs_client;
626 635
627 lsp = kzalloc(sizeof(*lsp), GFP_NOFS); 636 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
628 if (lsp == NULL) 637 if (lsp == NULL)
@@ -633,7 +642,18 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
633 lsp->ls_seqid.sequence = &lsp->ls_sequence; 642 lsp->ls_seqid.sequence = &lsp->ls_sequence;
634 atomic_set(&lsp->ls_count, 1); 643 atomic_set(&lsp->ls_count, 1);
635 lsp->ls_state = state; 644 lsp->ls_state = state;
636 lsp->ls_owner = fl_owner; 645 lsp->ls_owner.lo_type = type;
646 switch (lsp->ls_owner.lo_type) {
647 case NFS4_FLOCK_LOCK_TYPE:
648 lsp->ls_owner.lo_u.flock_owner = fl_pid;
649 break;
650 case NFS4_POSIX_LOCK_TYPE:
651 lsp->ls_owner.lo_u.posix_owner = fl_owner;
652 break;
653 default:
654 kfree(lsp);
655 return NULL;
656 }
637 spin_lock(&clp->cl_lock); 657 spin_lock(&clp->cl_lock);
638 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); 658 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
639 spin_unlock(&clp->cl_lock); 659 spin_unlock(&clp->cl_lock);
@@ -643,7 +663,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
643 663
644static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) 664static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
645{ 665{
646 struct nfs_client *clp = lsp->ls_state->owner->so_client; 666 struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
647 667
648 spin_lock(&clp->cl_lock); 668 spin_lock(&clp->cl_lock);
649 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); 669 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
@@ -657,13 +677,13 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
657 * exists, return an uninitialized one. 677 * exists, return an uninitialized one.
658 * 678 *
659 */ 679 */
660static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) 680static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
661{ 681{
662 struct nfs4_lock_state *lsp, *new = NULL; 682 struct nfs4_lock_state *lsp, *new = NULL;
663 683
664 for(;;) { 684 for(;;) {
665 spin_lock(&state->state_lock); 685 spin_lock(&state->state_lock);
666 lsp = __nfs4_find_lock_state(state, owner); 686 lsp = __nfs4_find_lock_state(state, owner, pid, type);
667 if (lsp != NULL) 687 if (lsp != NULL)
668 break; 688 break;
669 if (new != NULL) { 689 if (new != NULL) {
@@ -674,7 +694,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
674 break; 694 break;
675 } 695 }
676 spin_unlock(&state->state_lock); 696 spin_unlock(&state->state_lock);
677 new = nfs4_alloc_lock_state(state, owner); 697 new = nfs4_alloc_lock_state(state, owner, pid, type);
678 if (new == NULL) 698 if (new == NULL)
679 return NULL; 699 return NULL;
680 } 700 }
@@ -701,6 +721,8 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
701 if (list_empty(&state->lock_states)) 721 if (list_empty(&state->lock_states))
702 clear_bit(LK_STATE_IN_USE, &state->flags); 722 clear_bit(LK_STATE_IN_USE, &state->flags);
703 spin_unlock(&state->state_lock); 723 spin_unlock(&state->state_lock);
724 if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
725 nfs4_release_lockowner(lsp);
704 nfs4_free_lock_state(lsp); 726 nfs4_free_lock_state(lsp);
705} 727}
706 728
@@ -728,7 +750,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
728 750
729 if (fl->fl_ops != NULL) 751 if (fl->fl_ops != NULL)
730 return 0; 752 return 0;
731 lsp = nfs4_get_lock_state(state, fl->fl_owner); 753 if (fl->fl_flags & FL_POSIX)
754 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
755 else if (fl->fl_flags & FL_FLOCK)
756 lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
757 else
758 return -EINVAL;
732 if (lsp == NULL) 759 if (lsp == NULL)
733 return -ENOMEM; 760 return -ENOMEM;
734 fl->fl_u.nfs4_fl.owner = lsp; 761 fl->fl_u.nfs4_fl.owner = lsp;
@@ -740,7 +767,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
740 * Byte-range lock aware utility to initialize the stateid of read/write 767 * Byte-range lock aware utility to initialize the stateid of read/write
741 * requests. 768 * requests.
742 */ 769 */
743void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) 770void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
744{ 771{
745 struct nfs4_lock_state *lsp; 772 struct nfs4_lock_state *lsp;
746 int seq; 773 int seq;
@@ -753,7 +780,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
753 return; 780 return;
754 781
755 spin_lock(&state->state_lock); 782 spin_lock(&state->state_lock);
756 lsp = __nfs4_find_lock_state(state, fl_owner); 783 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
757 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 784 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
758 memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); 785 memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
759 spin_unlock(&state->state_lock); 786 spin_unlock(&state->state_lock);
@@ -1041,11 +1068,11 @@ restart:
1041 case -NFS4ERR_BAD_STATEID: 1068 case -NFS4ERR_BAD_STATEID:
1042 case -NFS4ERR_RECLAIM_BAD: 1069 case -NFS4ERR_RECLAIM_BAD:
1043 case -NFS4ERR_RECLAIM_CONFLICT: 1070 case -NFS4ERR_RECLAIM_CONFLICT:
1044 nfs4_state_mark_reclaim_nograce(sp->so_client, state); 1071 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1045 break; 1072 break;
1046 case -NFS4ERR_EXPIRED: 1073 case -NFS4ERR_EXPIRED:
1047 case -NFS4ERR_NO_GRACE: 1074 case -NFS4ERR_NO_GRACE:
1048 nfs4_state_mark_reclaim_nograce(sp->so_client, state); 1075 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1049 case -NFS4ERR_STALE_CLIENTID: 1076 case -NFS4ERR_STALE_CLIENTID:
1050 case -NFS4ERR_BADSESSION: 1077 case -NFS4ERR_BADSESSION:
1051 case -NFS4ERR_BADSLOT: 1078 case -NFS4ERR_BADSLOT:
@@ -1120,8 +1147,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1120 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1147 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1121 return; 1148 return;
1122 1149
1123 nfs4_reclaim_complete(clp, 1150 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1124 nfs4_reboot_recovery_ops[clp->cl_minorversion]);
1125 1151
1126 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1152 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1127 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1153 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
@@ -1211,8 +1237,8 @@ restart:
1211static int nfs4_check_lease(struct nfs_client *clp) 1237static int nfs4_check_lease(struct nfs_client *clp)
1212{ 1238{
1213 struct rpc_cred *cred; 1239 struct rpc_cred *cred;
1214 struct nfs4_state_maintenance_ops *ops = 1240 const struct nfs4_state_maintenance_ops *ops =
1215 nfs4_state_renewal_ops[clp->cl_minorversion]; 1241 clp->cl_mvops->state_renewal_ops;
1216 int status = -NFS4ERR_EXPIRED; 1242 int status = -NFS4ERR_EXPIRED;
1217 1243
1218 /* Is the client already known to have an expired lease? */ 1244 /* Is the client already known to have an expired lease? */
@@ -1235,8 +1261,8 @@ out:
1235static int nfs4_reclaim_lease(struct nfs_client *clp) 1261static int nfs4_reclaim_lease(struct nfs_client *clp)
1236{ 1262{
1237 struct rpc_cred *cred; 1263 struct rpc_cred *cred;
1238 struct nfs4_state_recovery_ops *ops = 1264 const struct nfs4_state_recovery_ops *ops =
1239 nfs4_reboot_recovery_ops[clp->cl_minorversion]; 1265 clp->cl_mvops->reboot_recovery_ops;
1240 int status = -ENOENT; 1266 int status = -ENOENT;
1241 1267
1242 cred = ops->get_clid_cred(clp); 1268 cred = ops->get_clid_cred(clp);
@@ -1444,7 +1470,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1444 /* First recover reboot state... */ 1470 /* First recover reboot state... */
1445 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { 1471 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1446 status = nfs4_do_reclaim(clp, 1472 status = nfs4_do_reclaim(clp,
1447 nfs4_reboot_recovery_ops[clp->cl_minorversion]); 1473 clp->cl_mvops->reboot_recovery_ops);
1448 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || 1474 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
1449 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) 1475 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
1450 continue; 1476 continue;
@@ -1458,7 +1484,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1458 /* Now recover expired state... */ 1484 /* Now recover expired state... */
1459 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { 1485 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
1460 status = nfs4_do_reclaim(clp, 1486 status = nfs4_do_reclaim(clp,
1461 nfs4_nograce_recovery_ops[clp->cl_minorversion]); 1487 clp->cl_mvops->nograce_recovery_ops);
1462 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || 1488 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
1463 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || 1489 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
1464 test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1490 test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 65c8dae4b267..08ef91291132 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -202,14 +202,17 @@ static int nfs4_stat_to_errno(int);
202#define encode_link_maxsz (op_encode_hdr_maxsz + \ 202#define encode_link_maxsz (op_encode_hdr_maxsz + \
203 nfs4_name_maxsz) 203 nfs4_name_maxsz)
204#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) 204#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
205#define encode_lockowner_maxsz (7)
205#define encode_lock_maxsz (op_encode_hdr_maxsz + \ 206#define encode_lock_maxsz (op_encode_hdr_maxsz + \
206 7 + \ 207 7 + \
207 1 + encode_stateid_maxsz + 8) 208 1 + encode_stateid_maxsz + 1 + \
209 encode_lockowner_maxsz)
208#define decode_lock_denied_maxsz \ 210#define decode_lock_denied_maxsz \
209 (8 + decode_lockowner_maxsz) 211 (8 + decode_lockowner_maxsz)
210#define decode_lock_maxsz (op_decode_hdr_maxsz + \ 212#define decode_lock_maxsz (op_decode_hdr_maxsz + \
211 decode_lock_denied_maxsz) 213 decode_lock_denied_maxsz)
212#define encode_lockt_maxsz (op_encode_hdr_maxsz + 12) 214#define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \
215 encode_lockowner_maxsz)
213#define decode_lockt_maxsz (op_decode_hdr_maxsz + \ 216#define decode_lockt_maxsz (op_decode_hdr_maxsz + \
214 decode_lock_denied_maxsz) 217 decode_lock_denied_maxsz)
215#define encode_locku_maxsz (op_encode_hdr_maxsz + 3 + \ 218#define encode_locku_maxsz (op_encode_hdr_maxsz + 3 + \
@@ -217,6 +220,11 @@ static int nfs4_stat_to_errno(int);
217 4) 220 4)
218#define decode_locku_maxsz (op_decode_hdr_maxsz + \ 221#define decode_locku_maxsz (op_decode_hdr_maxsz + \
219 decode_stateid_maxsz) 222 decode_stateid_maxsz)
223#define encode_release_lockowner_maxsz \
224 (op_encode_hdr_maxsz + \
225 encode_lockowner_maxsz)
226#define decode_release_lockowner_maxsz \
227 (op_decode_hdr_maxsz)
220#define encode_access_maxsz (op_encode_hdr_maxsz + 1) 228#define encode_access_maxsz (op_encode_hdr_maxsz + 1)
221#define decode_access_maxsz (op_decode_hdr_maxsz + 2) 229#define decode_access_maxsz (op_decode_hdr_maxsz + 2)
222#define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 230#define encode_symlink_maxsz (op_encode_hdr_maxsz + \
@@ -471,6 +479,12 @@ static int nfs4_stat_to_errno(int);
471 decode_sequence_maxsz + \ 479 decode_sequence_maxsz + \
472 decode_putfh_maxsz + \ 480 decode_putfh_maxsz + \
473 decode_locku_maxsz) 481 decode_locku_maxsz)
482#define NFS4_enc_release_lockowner_sz \
483 (compound_encode_hdr_maxsz + \
484 encode_lockowner_maxsz)
485#define NFS4_dec_release_lockowner_sz \
486 (compound_decode_hdr_maxsz + \
487 decode_lockowner_maxsz)
474#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ 488#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
475 encode_sequence_maxsz + \ 489 encode_sequence_maxsz + \
476 encode_putfh_maxsz + \ 490 encode_putfh_maxsz + \
@@ -744,7 +758,7 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
744 struct compound_hdr *hdr) 758 struct compound_hdr *hdr)
745{ 759{
746 __be32 *p; 760 __be32 *p;
747 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 761 struct rpc_auth *auth = req->rq_cred->cr_auth;
748 762
749 /* initialize running count of expected bytes in reply. 763 /* initialize running count of expected bytes in reply.
750 * NOTE: the replied tag SHOULD be the same is the one sent, 764 * NOTE: the replied tag SHOULD be the same is the one sent,
@@ -1042,6 +1056,17 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl)
1042 return fl->fl_end - fl->fl_start + 1; 1056 return fl->fl_end - fl->fl_start + 1;
1043} 1057}
1044 1058
1059static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
1060{
1061 __be32 *p;
1062
1063 p = reserve_space(xdr, 28);
1064 p = xdr_encode_hyper(p, lowner->clientid);
1065 *p++ = cpu_to_be32(16);
1066 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1067 xdr_encode_hyper(p, lowner->id);
1068}
1069
1045/* 1070/*
1046 * opcode,type,reclaim,offset,length,new_lock_owner = 32 1071 * opcode,type,reclaim,offset,length,new_lock_owner = 32
1047 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 1072 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
@@ -1058,14 +1083,11 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1058 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1083 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1059 *p = cpu_to_be32(args->new_lock_owner); 1084 *p = cpu_to_be32(args->new_lock_owner);
1060 if (args->new_lock_owner){ 1085 if (args->new_lock_owner){
1061 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32); 1086 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1062 *p++ = cpu_to_be32(args->open_seqid->sequence->counter); 1087 *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
1063 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); 1088 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
1064 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); 1089 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1065 p = xdr_encode_hyper(p, args->lock_owner.clientid); 1090 encode_lockowner(xdr, &args->lock_owner);
1066 *p++ = cpu_to_be32(16);
1067 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1068 xdr_encode_hyper(p, args->lock_owner.id);
1069 } 1091 }
1070 else { 1092 else {
1071 p = reserve_space(xdr, NFS4_STATEID_SIZE+4); 1093 p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
@@ -1080,15 +1102,12 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
1080{ 1102{
1081 __be32 *p; 1103 __be32 *p;
1082 1104
1083 p = reserve_space(xdr, 52); 1105 p = reserve_space(xdr, 24);
1084 *p++ = cpu_to_be32(OP_LOCKT); 1106 *p++ = cpu_to_be32(OP_LOCKT);
1085 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); 1107 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1086 p = xdr_encode_hyper(p, args->fl->fl_start); 1108 p = xdr_encode_hyper(p, args->fl->fl_start);
1087 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1109 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1088 p = xdr_encode_hyper(p, args->lock_owner.clientid); 1110 encode_lockowner(xdr, &args->lock_owner);
1089 *p++ = cpu_to_be32(16);
1090 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1091 xdr_encode_hyper(p, args->lock_owner.id);
1092 hdr->nops++; 1111 hdr->nops++;
1093 hdr->replen += decode_lockt_maxsz; 1112 hdr->replen += decode_lockt_maxsz;
1094} 1113}
@@ -1108,6 +1127,17 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
1108 hdr->replen += decode_locku_maxsz; 1127 hdr->replen += decode_locku_maxsz;
1109} 1128}
1110 1129
1130static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
1131{
1132 __be32 *p;
1133
1134 p = reserve_space(xdr, 4);
1135 *p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
1136 encode_lockowner(xdr, lowner);
1137 hdr->nops++;
1138 hdr->replen += decode_release_lockowner_maxsz;
1139}
1140
1111static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1141static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1112{ 1142{
1113 int len = name->len; 1143 int len = name->len;
@@ -1172,7 +1202,7 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
1172 break; 1202 break;
1173 default: 1203 default:
1174 clp = arg->server->nfs_client; 1204 clp = arg->server->nfs_client;
1175 if (clp->cl_minorversion > 0) { 1205 if (clp->cl_mvops->minor_version > 0) {
1176 if (nfs4_has_persistent_session(clp)) { 1206 if (nfs4_has_persistent_session(clp)) {
1177 *p = cpu_to_be32(NFS4_CREATE_GUARDED); 1207 *p = cpu_to_be32(NFS4_CREATE_GUARDED);
1178 encode_attrs(xdr, arg->u.attrs, arg->server); 1208 encode_attrs(xdr, arg->u.attrs, arg->server);
@@ -1324,14 +1354,14 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1324 hdr->replen += decode_putrootfh_maxsz; 1354 hdr->replen += decode_putrootfh_maxsz;
1325} 1355}
1326 1356
1327static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) 1357static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
1328{ 1358{
1329 nfs4_stateid stateid; 1359 nfs4_stateid stateid;
1330 __be32 *p; 1360 __be32 *p;
1331 1361
1332 p = reserve_space(xdr, NFS4_STATEID_SIZE); 1362 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1333 if (ctx->state != NULL) { 1363 if (ctx->state != NULL) {
1334 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); 1364 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
1335 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1365 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1336 } else 1366 } else
1337 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1367 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1344,7 +1374,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1344 p = reserve_space(xdr, 4); 1374 p = reserve_space(xdr, 4);
1345 *p = cpu_to_be32(OP_READ); 1375 *p = cpu_to_be32(OP_READ);
1346 1376
1347 encode_stateid(xdr, args->context); 1377 encode_stateid(xdr, args->context, args->lock_context);
1348 1378
1349 p = reserve_space(xdr, 12); 1379 p = reserve_space(xdr, 12);
1350 p = xdr_encode_hyper(p, args->offset); 1380 p = xdr_encode_hyper(p, args->offset);
@@ -1523,7 +1553,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1523 p = reserve_space(xdr, 4); 1553 p = reserve_space(xdr, 4);
1524 *p = cpu_to_be32(OP_WRITE); 1554 *p = cpu_to_be32(OP_WRITE);
1525 1555
1526 encode_stateid(xdr, args->context); 1556 encode_stateid(xdr, args->context, args->lock_context);
1527 1557
1528 p = reserve_space(xdr, 16); 1558 p = reserve_space(xdr, 16);
1529 p = xdr_encode_hyper(p, args->offset); 1559 p = xdr_encode_hyper(p, args->offset);
@@ -1704,7 +1734,7 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
1704{ 1734{
1705#if defined(CONFIG_NFS_V4_1) 1735#if defined(CONFIG_NFS_V4_1)
1706 if (args->sa_session) 1736 if (args->sa_session)
1707 return args->sa_session->clp->cl_minorversion; 1737 return args->sa_session->clp->cl_mvops->minor_version;
1708#endif /* CONFIG_NFS_V4_1 */ 1738#endif /* CONFIG_NFS_V4_1 */
1709 return 0; 1739 return 0;
1710} 1740}
@@ -2048,6 +2078,20 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_
2048 return 0; 2078 return 0;
2049} 2079}
2050 2080
2081static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
2082{
2083 struct xdr_stream xdr;
2084 struct compound_hdr hdr = {
2085 .minorversion = 0,
2086 };
2087
2088 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2089 encode_compound_hdr(&xdr, req, &hdr);
2090 encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
2091 encode_nops(&hdr);
2092 return 0;
2093}
2094
2051/* 2095/*
2052 * Encode a READLINK request 2096 * Encode a READLINK request
2053 */ 2097 */
@@ -2395,7 +2439,7 @@ static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
2395{ 2439{
2396 struct xdr_stream xdr; 2440 struct xdr_stream xdr;
2397 struct compound_hdr hdr = { 2441 struct compound_hdr hdr = {
2398 .minorversion = args->client->cl_minorversion, 2442 .minorversion = args->client->cl_mvops->minor_version,
2399 }; 2443 };
2400 2444
2401 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2445 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
@@ -2413,7 +2457,7 @@ static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
2413{ 2457{
2414 struct xdr_stream xdr; 2458 struct xdr_stream xdr;
2415 struct compound_hdr hdr = { 2459 struct compound_hdr hdr = {
2416 .minorversion = args->client->cl_minorversion, 2460 .minorversion = args->client->cl_mvops->minor_version,
2417 }; 2461 };
2418 2462
2419 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2463 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
@@ -2431,7 +2475,7 @@ static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
2431{ 2475{
2432 struct xdr_stream xdr; 2476 struct xdr_stream xdr;
2433 struct compound_hdr hdr = { 2477 struct compound_hdr hdr = {
2434 .minorversion = session->clp->cl_minorversion, 2478 .minorversion = session->clp->cl_mvops->minor_version,
2435 }; 2479 };
2436 2480
2437 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2481 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
@@ -3973,6 +4017,11 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3973 return status; 4017 return status;
3974} 4018}
3975 4019
4020static int decode_release_lockowner(struct xdr_stream *xdr)
4021{
4022 return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
4023}
4024
3976static int decode_lookup(struct xdr_stream *xdr) 4025static int decode_lookup(struct xdr_stream *xdr)
3977{ 4026{
3978 return decode_op_hdr(xdr, OP_LOOKUP); 4027 return decode_op_hdr(xdr, OP_LOOKUP);
@@ -5259,6 +5308,19 @@ out:
5259 return status; 5308 return status;
5260} 5309}
5261 5310
5311static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
5312{
5313 struct xdr_stream xdr;
5314 struct compound_hdr hdr;
5315 int status;
5316
5317 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5318 status = decode_compound_hdr(&xdr, &hdr);
5319 if (!status)
5320 status = decode_release_lockowner(&xdr);
5321 return status;
5322}
5323
5262/* 5324/*
5263 * Decode READLINK response 5325 * Decode READLINK response
5264 */ 5326 */
@@ -5866,6 +5928,7 @@ struct rpc_procinfo nfs4_procedures[] = {
5866 PROC(GETACL, enc_getacl, dec_getacl), 5928 PROC(GETACL, enc_getacl, dec_getacl),
5867 PROC(SETACL, enc_setacl, dec_setacl), 5929 PROC(SETACL, enc_setacl, dec_setacl),
5868 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 5930 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
5931 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
5869#if defined(CONFIG_NFS_V4_1) 5932#if defined(CONFIG_NFS_V4_1)
5870 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 5933 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
5871 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 5934 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 6bd19d843af7..df101d9f546a 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -105,7 +105,7 @@ static char nfs_root_name[256] __initdata = "";
105static __be32 servaddr __initdata = 0; 105static __be32 servaddr __initdata = 0;
106 106
107/* Name of directory to mount */ 107/* Name of directory to mount */
108static char nfs_export_path[NFS_MAXPATHLEN] __initdata = { 0, }; 108static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, };
109 109
110/* NFS-related data */ 110/* NFS-related data */
111static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ 111static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a3654e57b589..919490232e17 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -79,6 +79,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
79 req->wb_pgbase = offset; 79 req->wb_pgbase = offset;
80 req->wb_bytes = count; 80 req->wb_bytes = count;
81 req->wb_context = get_nfs_open_context(ctx); 81 req->wb_context = get_nfs_open_context(ctx);
82 req->wb_lock_context = nfs_get_lock_context(ctx);
82 kref_init(&req->wb_kref); 83 kref_init(&req->wb_kref);
83 return req; 84 return req;
84} 85}
@@ -141,11 +142,16 @@ void nfs_clear_request(struct nfs_page *req)
141{ 142{
142 struct page *page = req->wb_page; 143 struct page *page = req->wb_page;
143 struct nfs_open_context *ctx = req->wb_context; 144 struct nfs_open_context *ctx = req->wb_context;
145 struct nfs_lock_context *l_ctx = req->wb_lock_context;
144 146
145 if (page != NULL) { 147 if (page != NULL) {
146 page_cache_release(page); 148 page_cache_release(page);
147 req->wb_page = NULL; 149 req->wb_page = NULL;
148 } 150 }
151 if (l_ctx != NULL) {
152 nfs_put_lock_context(l_ctx);
153 req->wb_lock_context = NULL;
154 }
149 if (ctx != NULL) { 155 if (ctx != NULL) {
150 put_nfs_open_context(ctx); 156 put_nfs_open_context(ctx);
151 req->wb_context = NULL; 157 req->wb_context = NULL;
@@ -235,7 +241,7 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
235{ 241{
236 if (req->wb_context->cred != prev->wb_context->cred) 242 if (req->wb_context->cred != prev->wb_context->cred)
237 return 0; 243 return 0;
238 if (req->wb_context->lockowner != prev->wb_context->lockowner) 244 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
239 return 0; 245 return 0;
240 if (req->wb_context->state != prev->wb_context->state) 246 if (req->wb_context->state != prev->wb_context->state)
241 return 0; 247 return 0;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6e2b06e6ca79..87adc2744246 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -190,6 +190,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
190 data->args.pages = data->pagevec; 190 data->args.pages = data->pagevec;
191 data->args.count = count; 191 data->args.count = count;
192 data->args.context = get_nfs_open_context(req->wb_context); 192 data->args.context = get_nfs_open_context(req->wb_context);
193 data->args.lock_context = req->wb_lock_context;
193 194
194 data->res.fattr = &data->fattr; 195 data->res.fattr = &data->fattr;
195 data->res.count = count; 196 data->res.count = count;
@@ -410,7 +411,7 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
410{ 411{
411 struct nfs_read_data *data = calldata; 412 struct nfs_read_data *data = calldata;
412 413
413 if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, 414 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
414 &data->args.seq_args, &data->res.seq_res, 415 &data->args.seq_args, &data->res.seq_res,
415 0, task)) 416 0, task))
416 return; 417 return;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f9df16de4a56..ee26316ad1f4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -270,7 +270,7 @@ static const struct super_operations nfs_sops = {
270 .write_inode = nfs_write_inode, 270 .write_inode = nfs_write_inode,
271 .put_super = nfs_put_super, 271 .put_super = nfs_put_super,
272 .statfs = nfs_statfs, 272 .statfs = nfs_statfs,
273 .clear_inode = nfs_clear_inode, 273 .evict_inode = nfs_evict_inode,
274 .umount_begin = nfs_umount_begin, 274 .umount_begin = nfs_umount_begin,
275 .show_options = nfs_show_options, 275 .show_options = nfs_show_options,
276 .show_stats = nfs_show_stats, 276 .show_stats = nfs_show_stats,
@@ -340,7 +340,7 @@ static const struct super_operations nfs4_sops = {
340 .write_inode = nfs_write_inode, 340 .write_inode = nfs_write_inode,
341 .put_super = nfs_put_super, 341 .put_super = nfs_put_super,
342 .statfs = nfs_statfs, 342 .statfs = nfs_statfs,
343 .clear_inode = nfs4_clear_inode, 343 .evict_inode = nfs4_evict_inode,
344 .umount_begin = nfs_umount_begin, 344 .umount_begin = nfs_umount_begin,
345 .show_options = nfs_show_options, 345 .show_options = nfs_show_options,
346 .show_stats = nfs_show_stats, 346 .show_stats = nfs_show_stats,
@@ -546,6 +546,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
546{ 546{
547 struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address; 547 struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address;
548 548
549 if (nfss->flags & NFS_MOUNT_LEGACY_INTERFACE)
550 return;
551
549 switch (sap->sa_family) { 552 switch (sap->sa_family) {
550 case AF_INET: { 553 case AF_INET: {
551 struct sockaddr_in *sin = (struct sockaddr_in *)sap; 554 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
@@ -1780,6 +1783,7 @@ static int nfs_validate_mount_data(void *options,
1780 * can deal with. 1783 * can deal with.
1781 */ 1784 */
1782 args->flags = data->flags & NFS_MOUNT_FLAGMASK; 1785 args->flags = data->flags & NFS_MOUNT_FLAGMASK;
1786 args->flags |= NFS_MOUNT_LEGACY_INTERFACE;
1783 args->rsize = data->rsize; 1787 args->rsize = data->rsize;
1784 args->wsize = data->wsize; 1788 args->wsize = data->wsize;
1785 args->timeo = data->timeo; 1789 args->timeo = data->timeo;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a2242af6a17d..2f84adaad427 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
110 struct nfs_unlinkdata *data = calldata; 110 struct nfs_unlinkdata *data = calldata;
111 struct nfs_server *server = NFS_SERVER(data->dir); 111 struct nfs_server *server = NFS_SERVER(data->dir);
112 112
113 if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, 113 if (nfs4_setup_sequence(server, &data->args.seq_args,
114 &data->res.seq_res, 1, task)) 114 &data->res.seq_res, 1, task))
115 return; 115 return;
116 rpc_call_start(task); 116 rpc_call_start(task);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 91679e2631ee..874972d9427c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -222,7 +222,7 @@ static void nfs_end_page_writeback(struct page *page)
222 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 222 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
223} 223}
224 224
225static struct nfs_page *nfs_find_and_lock_request(struct page *page) 225static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
226{ 226{
227 struct inode *inode = page->mapping->host; 227 struct inode *inode = page->mapping->host;
228 struct nfs_page *req; 228 struct nfs_page *req;
@@ -241,7 +241,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page)
241 * request as dirty (in which case we don't care). 241 * request as dirty (in which case we don't care).
242 */ 242 */
243 spin_unlock(&inode->i_lock); 243 spin_unlock(&inode->i_lock);
244 ret = nfs_wait_on_request(req); 244 if (!nonblock)
245 ret = nfs_wait_on_request(req);
246 else
247 ret = -EAGAIN;
245 nfs_release_request(req); 248 nfs_release_request(req);
246 if (ret != 0) 249 if (ret != 0)
247 return ERR_PTR(ret); 250 return ERR_PTR(ret);
@@ -256,12 +259,12 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page)
256 * May return an error if the user signalled nfs_wait_on_request(). 259 * May return an error if the user signalled nfs_wait_on_request().
257 */ 260 */
258static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 261static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
259 struct page *page) 262 struct page *page, bool nonblock)
260{ 263{
261 struct nfs_page *req; 264 struct nfs_page *req;
262 int ret = 0; 265 int ret = 0;
263 266
264 req = nfs_find_and_lock_request(page); 267 req = nfs_find_and_lock_request(page, nonblock);
265 if (!req) 268 if (!req)
266 goto out; 269 goto out;
267 ret = PTR_ERR(req); 270 ret = PTR_ERR(req);
@@ -283,12 +286,20 @@ out:
283static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 286static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
284{ 287{
285 struct inode *inode = page->mapping->host; 288 struct inode *inode = page->mapping->host;
289 int ret;
286 290
287 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 291 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
288 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 292 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
289 293
290 nfs_pageio_cond_complete(pgio, page->index); 294 nfs_pageio_cond_complete(pgio, page->index);
291 return nfs_page_async_flush(pgio, page); 295 ret = nfs_page_async_flush(pgio, page,
296 wbc->sync_mode == WB_SYNC_NONE ||
297 wbc->nonblocking != 0);
298 if (ret == -EAGAIN) {
299 redirty_page_for_writepage(wbc, page);
300 ret = 0;
301 }
302 return ret;
292} 303}
293 304
294/* 305/*
@@ -689,7 +700,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
689 req = nfs_page_find_request(page); 700 req = nfs_page_find_request(page);
690 if (req == NULL) 701 if (req == NULL)
691 return 0; 702 return 0;
692 do_flush = req->wb_page != page || req->wb_context != ctx; 703 do_flush = req->wb_page != page || req->wb_context != ctx ||
704 req->wb_lock_context->lockowner != current->files ||
705 req->wb_lock_context->pid != current->tgid;
693 nfs_release_request(req); 706 nfs_release_request(req);
694 if (!do_flush) 707 if (!do_flush)
695 return 0; 708 return 0;
@@ -813,6 +826,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
813 data->args.pages = data->pagevec; 826 data->args.pages = data->pagevec;
814 data->args.count = count; 827 data->args.count = count;
815 data->args.context = get_nfs_open_context(req->wb_context); 828 data->args.context = get_nfs_open_context(req->wb_context);
829 data->args.lock_context = req->wb_lock_context;
816 data->args.stable = NFS_UNSTABLE; 830 data->args.stable = NFS_UNSTABLE;
817 if (how & FLUSH_STABLE) { 831 if (how & FLUSH_STABLE) {
818 data->args.stable = NFS_DATA_SYNC; 832 data->args.stable = NFS_DATA_SYNC;
@@ -1036,9 +1050,9 @@ out:
1036void nfs_write_prepare(struct rpc_task *task, void *calldata) 1050void nfs_write_prepare(struct rpc_task *task, void *calldata)
1037{ 1051{
1038 struct nfs_write_data *data = calldata; 1052 struct nfs_write_data *data = calldata;
1039 struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
1040 1053
1041 if (nfs4_setup_sequence(clp, &data->args.seq_args, 1054 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
1055 &data->args.seq_args,
1042 &data->res.seq_res, 1, task)) 1056 &data->res.seq_res, 1, task))
1043 return; 1057 return;
1044 rpc_call_start(task); 1058 rpc_call_start(task);
@@ -1379,7 +1393,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
1379 .rpc_release = nfs_commit_release, 1393 .rpc_release = nfs_commit_release,
1380}; 1394};
1381 1395
1382static int nfs_commit_inode(struct inode *inode, int how) 1396int nfs_commit_inode(struct inode *inode, int how)
1383{ 1397{
1384 LIST_HEAD(head); 1398 LIST_HEAD(head);
1385 int may_wait = how & FLUSH_SYNC; 1399 int may_wait = how & FLUSH_SYNC;
@@ -1443,11 +1457,6 @@ out_mark_dirty:
1443 return ret; 1457 return ret;
1444} 1458}
1445#else 1459#else
1446static int nfs_commit_inode(struct inode *inode, int how)
1447{
1448 return 0;
1449}
1450
1451static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) 1460static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
1452{ 1461{
1453 return 0; 1462 return 0;
@@ -1546,7 +1555,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1546 1555
1547 nfs_fscache_release_page(page, GFP_KERNEL); 1556 nfs_fscache_release_page(page, GFP_KERNEL);
1548 1557
1549 req = nfs_find_and_lock_request(page); 1558 req = nfs_find_and_lock_request(page, false);
1550 ret = PTR_ERR(req); 1559 ret = PTR_ERR(req);
1551 if (IS_ERR(req)) 1560 if (IS_ERR(req))
1552 goto out; 1561 goto out;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 3d68f45a37b9..5b7e3021e06b 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -168,7 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
168 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); 168 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
169 169
170 fh_copy(&resp->fh, &argp->fh); 170 fh_copy(&resp->fh, &argp->fh);
171 nfserr = nfsd_read(rqstp, &resp->fh, NULL, 171 nfserr = nfsd_read(rqstp, &resp->fh,
172 argp->offset, 172 argp->offset,
173 rqstp->rq_vec, argp->vlen, 173 rqstp->rq_vec, argp->vlen,
174 &resp->count); 174 &resp->count);
@@ -271,7 +271,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
271 fh_init(&resp->fh, NFS3_FHSIZE); 271 fh_init(&resp->fh, NFS3_FHSIZE);
272 nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, 272 nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
273 &argp->attrs, S_IFDIR, 0, &resp->fh); 273 &argp->attrs, S_IFDIR, 0, &resp->fh);
274 274 fh_unlock(&resp->dirfh);
275 RETURN_STATUS(nfserr); 275 RETURN_STATUS(nfserr);
276} 276}
277 277
@@ -327,7 +327,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
327 type = nfs3_ftypes[argp->ftype]; 327 type = nfs3_ftypes[argp->ftype];
328 nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, 328 nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
329 &argp->attrs, type, rdev, &resp->fh); 329 &argp->attrs, type, rdev, &resp->fh);
330 330 fh_unlock(&resp->dirfh);
331 RETURN_STATUS(nfserr); 331 RETURN_STATUS(nfserr);
332} 332}
333 333
@@ -348,6 +348,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
348 /* Unlink. -S_IFDIR means file must not be a directory */ 348 /* Unlink. -S_IFDIR means file must not be a directory */
349 fh_copy(&resp->fh, &argp->fh); 349 fh_copy(&resp->fh, &argp->fh);
350 nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); 350 nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len);
351 fh_unlock(&resp->fh);
351 RETURN_STATUS(nfserr); 352 RETURN_STATUS(nfserr);
352} 353}
353 354
@@ -367,6 +368,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
367 368
368 fh_copy(&resp->fh, &argp->fh); 369 fh_copy(&resp->fh, &argp->fh);
369 nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); 370 nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len);
371 fh_unlock(&resp->fh);
370 RETURN_STATUS(nfserr); 372 RETURN_STATUS(nfserr);
371} 373}
372 374
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index eb78e7e22077..988cbb3a19b6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -143,8 +143,6 @@ struct nfs4_cb_compound_hdr {
143 u32 minorversion; 143 u32 minorversion;
144 /* res */ 144 /* res */
145 int status; 145 int status;
146 u32 taglen;
147 char *tag;
148}; 146};
149 147
150static struct { 148static struct {
@@ -205,6 +203,16 @@ nfs_cb_stat_to_errno(int stat)
205 */ 203 */
206 204
207static void 205static void
206encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
207{
208 __be32 *p;
209
210 RESERVE_SPACE(sizeof(stateid_t));
211 WRITE32(sid->si_generation);
212 WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
213}
214
215static void
208encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) 216encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
209{ 217{
210 __be32 * p; 218 __be32 * p;
@@ -229,10 +237,10 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
229 __be32 *p; 237 __be32 *p;
230 int len = dp->dl_fh.fh_size; 238 int len = dp->dl_fh.fh_size;
231 239
232 RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len); 240 RESERVE_SPACE(4);
233 WRITE32(OP_CB_RECALL); 241 WRITE32(OP_CB_RECALL);
234 WRITE32(dp->dl_stateid.si_generation); 242 encode_stateid(xdr, &dp->dl_stateid);
235 WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t)); 243 RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
236 WRITE32(0); /* truncate optimization not implemented */ 244 WRITE32(0); /* truncate optimization not implemented */
237 WRITE32(len); 245 WRITE32(len);
238 WRITEMEM(&dp->dl_fh.fh_base, len); 246 WRITEMEM(&dp->dl_fh.fh_base, len);
@@ -293,13 +301,14 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
293static int 301static int
294decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ 302decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
295 __be32 *p; 303 __be32 *p;
304 u32 taglen;
296 305
297 READ_BUF(8); 306 READ_BUF(8);
298 READ32(hdr->status); 307 READ32(hdr->status);
299 READ32(hdr->taglen); 308 /* We've got no use for the tag; ignore it: */
300 READ_BUF(hdr->taglen + 4); 309 READ32(taglen);
301 hdr->tag = (char *)p; 310 READ_BUF(taglen + 4);
302 p += XDR_QUADLEN(hdr->taglen); 311 p += XDR_QUADLEN(taglen);
303 READ32(hdr->nops); 312 READ32(hdr->nops);
304 return 0; 313 return 0;
305} 314}
@@ -667,28 +676,28 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
667 } 676 }
668 677
669 switch (task->tk_status) { 678 switch (task->tk_status) {
670 case -EIO: 679 case 0:
680 return;
681 case -EBADHANDLE:
682 case -NFS4ERR_BAD_STATEID:
683 /* Race: client probably got cb_recall
684 * before open reply granting delegation */
685 break;
686 default:
671 /* Network partition? */ 687 /* Network partition? */
672 atomic_set(&clp->cl_cb_set, 0); 688 atomic_set(&clp->cl_cb_set, 0);
673 warn_no_callback_path(clp, task->tk_status); 689 warn_no_callback_path(clp, task->tk_status);
674 if (current_rpc_client != task->tk_client) { 690 if (current_rpc_client != task->tk_client) {
675 /* queue a callback on the new connection: */ 691 /* queue a callback on the new connection: */
692 atomic_inc(&dp->dl_count);
676 nfsd4_cb_recall(dp); 693 nfsd4_cb_recall(dp);
677 return; 694 return;
678 } 695 }
679 case -EBADHANDLE:
680 case -NFS4ERR_BAD_STATEID:
681 /* Race: client probably got cb_recall
682 * before open reply granting delegation */
683 break;
684 default:
685 /* success, or error we can't handle */
686 return;
687 } 696 }
688 if (dp->dl_retries--) { 697 if (dp->dl_retries--) {
689 rpc_delay(task, 2*HZ); 698 rpc_delay(task, 2*HZ);
690 task->tk_status = 0; 699 task->tk_status = 0;
691 rpc_restart_call(task); 700 rpc_restart_call_prepare(task);
692 return; 701 return;
693 } else { 702 } else {
694 atomic_set(&clp->cl_cb_set, 0); 703 atomic_set(&clp->cl_cb_set, 0);
@@ -752,18 +761,16 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
752 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 761 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
753 .rpc_cred = callback_cred 762 .rpc_cred = callback_cred
754 }; 763 };
755 int status;
756 764
757 if (clnt == NULL) 765 if (clnt == NULL) {
766 nfs4_put_delegation(dp);
758 return; /* Client is shutting down; give up. */ 767 return; /* Client is shutting down; give up. */
768 }
759 769
760 args->args_op = dp; 770 args->args_op = dp;
761 msg.rpc_argp = args; 771 msg.rpc_argp = args;
762 dp->dl_retries = 1; 772 dp->dl_retries = 1;
763 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, 773 rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp);
764 &nfsd4_cb_recall_ops, dp);
765 if (status)
766 nfs4_put_delegation(dp);
767} 774}
768 775
769void nfsd4_do_callback_rpc(struct work_struct *w) 776void nfsd4_do_callback_rpc(struct work_struct *w)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4a2734758778..2e7357104cfd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -51,7 +51,6 @@ static time_t boot_time;
51static u32 current_ownerid = 1; 51static u32 current_ownerid = 1;
52static u32 current_fileid = 1; 52static u32 current_fileid = 1;
53static u32 current_delegid = 1; 53static u32 current_delegid = 1;
54static u32 nfs4_init;
55static stateid_t zerostateid; /* bits all 0 */ 54static stateid_t zerostateid; /* bits all 0 */
56static stateid_t onestateid; /* bits all 1 */ 55static stateid_t onestateid; /* bits all 1 */
57static u64 current_sessionid = 1; 56static u64 current_sessionid = 1;
@@ -163,6 +162,46 @@ static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
163static struct list_head file_hashtbl[FILE_HASH_SIZE]; 162static struct list_head file_hashtbl[FILE_HASH_SIZE];
164static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; 163static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
165 164
165static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
166{
167 BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
168 atomic_inc(&fp->fi_access[oflag]);
169}
170
171static void nfs4_file_get_access(struct nfs4_file *fp, int oflag)
172{
173 if (oflag == O_RDWR) {
174 __nfs4_file_get_access(fp, O_RDONLY);
175 __nfs4_file_get_access(fp, O_WRONLY);
176 } else
177 __nfs4_file_get_access(fp, oflag);
178}
179
180static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
181{
182 if (fp->fi_fds[oflag]) {
183 fput(fp->fi_fds[oflag]);
184 fp->fi_fds[oflag] = NULL;
185 }
186}
187
188static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
189{
190 if (atomic_dec_and_test(&fp->fi_access[oflag])) {
191 nfs4_file_put_fd(fp, O_RDWR);
192 nfs4_file_put_fd(fp, oflag);
193 }
194}
195
196static void nfs4_file_put_access(struct nfs4_file *fp, int oflag)
197{
198 if (oflag == O_RDWR) {
199 __nfs4_file_put_access(fp, O_RDONLY);
200 __nfs4_file_put_access(fp, O_WRONLY);
201 } else
202 __nfs4_file_put_access(fp, oflag);
203}
204
166static struct nfs4_delegation * 205static struct nfs4_delegation *
167alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 206alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
168{ 207{
@@ -171,6 +210,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
171 struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; 210 struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
172 211
173 dprintk("NFSD alloc_init_deleg\n"); 212 dprintk("NFSD alloc_init_deleg\n");
213 /*
214 * Major work on the lease subsystem (for example, to support
215 * calbacks on stat) will be required before we can support
216 * write delegations properly.
217 */
218 if (type != NFS4_OPEN_DELEGATE_READ)
219 return NULL;
174 if (fp->fi_had_conflict) 220 if (fp->fi_had_conflict)
175 return NULL; 221 return NULL;
176 if (num_delegations > max_delegations) 222 if (num_delegations > max_delegations)
@@ -185,9 +231,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
185 dp->dl_client = clp; 231 dp->dl_client = clp;
186 get_nfs4_file(fp); 232 get_nfs4_file(fp);
187 dp->dl_file = fp; 233 dp->dl_file = fp;
234 nfs4_file_get_access(fp, O_RDONLY);
188 dp->dl_flock = NULL; 235 dp->dl_flock = NULL;
189 get_file(stp->st_vfs_file);
190 dp->dl_vfs_file = stp->st_vfs_file;
191 dp->dl_type = type; 236 dp->dl_type = type;
192 dp->dl_ident = cb->cb_ident; 237 dp->dl_ident = cb->cb_ident;
193 dp->dl_stateid.si_boot = boot_time; 238 dp->dl_stateid.si_boot = boot_time;
@@ -222,15 +267,12 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
222static void 267static void
223nfs4_close_delegation(struct nfs4_delegation *dp) 268nfs4_close_delegation(struct nfs4_delegation *dp)
224{ 269{
225 struct file *filp = dp->dl_vfs_file; 270 struct file *filp = find_readable_file(dp->dl_file);
226 271
227 dprintk("NFSD: close_delegation dp %p\n",dp); 272 dprintk("NFSD: close_delegation dp %p\n",dp);
228 dp->dl_vfs_file = NULL;
229 /* The following nfsd_close may not actually close the file,
230 * but we want to remove the lease in any case. */
231 if (dp->dl_flock) 273 if (dp->dl_flock)
232 vfs_setlease(filp, F_UNLCK, &dp->dl_flock); 274 vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
233 nfsd_close(filp); 275 nfs4_file_put_access(dp->dl_file, O_RDONLY);
234} 276}
235 277
236/* Called under the state lock. */ 278/* Called under the state lock. */
@@ -302,8 +344,12 @@ static void free_generic_stateid(struct nfs4_stateid *stp)
302 344
303static void release_lock_stateid(struct nfs4_stateid *stp) 345static void release_lock_stateid(struct nfs4_stateid *stp)
304{ 346{
347 struct file *file;
348
305 unhash_generic_stateid(stp); 349 unhash_generic_stateid(stp);
306 locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner); 350 file = find_any_file(stp->st_file);
351 if (file)
352 locks_remove_posix(file, (fl_owner_t)stp->st_stateowner);
307 free_generic_stateid(stp); 353 free_generic_stateid(stp);
308} 354}
309 355
@@ -341,11 +387,85 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp)
341 } 387 }
342} 388}
343 389
390/*
391 * We store the NONE, READ, WRITE, and BOTH bits separately in the
392 * st_{access,deny}_bmap field of the stateid, in order to track not
393 * only what share bits are currently in force, but also what
394 * combinations of share bits previous opens have used. This allows us
395 * to enforce the recommendation of rfc 3530 14.2.19 that the server
396 * return an error if the client attempt to downgrade to a combination
397 * of share bits not explicable by closing some of its previous opens.
398 *
399 * XXX: This enforcement is actually incomplete, since we don't keep
400 * track of access/deny bit combinations; so, e.g., we allow:
401 *
402 * OPEN allow read, deny write
403 * OPEN allow both, deny none
404 * DOWNGRADE allow read, deny none
405 *
406 * which we should reject.
407 */
408static void
409set_access(unsigned int *access, unsigned long bmap) {
410 int i;
411
412 *access = 0;
413 for (i = 1; i < 4; i++) {
414 if (test_bit(i, &bmap))
415 *access |= i;
416 }
417}
418
419static void
420set_deny(unsigned int *deny, unsigned long bmap) {
421 int i;
422
423 *deny = 0;
424 for (i = 0; i < 4; i++) {
425 if (test_bit(i, &bmap))
426 *deny |= i ;
427 }
428}
429
430static int
431test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
432 unsigned int access, deny;
433
434 set_access(&access, stp->st_access_bmap);
435 set_deny(&deny, stp->st_deny_bmap);
436 if ((access & open->op_share_deny) || (deny & open->op_share_access))
437 return 0;
438 return 1;
439}
440
441static int nfs4_access_to_omode(u32 access)
442{
443 switch (access) {
444 case NFS4_SHARE_ACCESS_READ:
445 return O_RDONLY;
446 case NFS4_SHARE_ACCESS_WRITE:
447 return O_WRONLY;
448 case NFS4_SHARE_ACCESS_BOTH:
449 return O_RDWR;
450 }
451 BUG();
452}
453
454static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp)
455{
456 unsigned int access;
457
458 set_access(&access, stp->st_access_bmap);
459 return nfs4_access_to_omode(access);
460}
461
344static void release_open_stateid(struct nfs4_stateid *stp) 462static void release_open_stateid(struct nfs4_stateid *stp)
345{ 463{
464 int oflag = nfs4_access_bmap_to_omode(stp);
465
346 unhash_generic_stateid(stp); 466 unhash_generic_stateid(stp);
347 release_stateid_lockowners(stp); 467 release_stateid_lockowners(stp);
348 nfsd_close(stp->st_vfs_file); 468 nfs4_file_put_access(stp->st_file, oflag);
349 free_generic_stateid(stp); 469 free_generic_stateid(stp);
350} 470}
351 471
@@ -457,7 +577,7 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
457 spin_unlock(&nfsd_drc_lock); 577 spin_unlock(&nfsd_drc_lock);
458 578
459 if (fchan->maxreqs == 0) 579 if (fchan->maxreqs == 0)
460 return nfserr_serverfault; 580 return nfserr_jukebox;
461 581
462 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; 582 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
463 return 0; 583 return 0;
@@ -542,7 +662,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
542 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) 662 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
543 + sizeof(struct nfsd4_session) > PAGE_SIZE); 663 + sizeof(struct nfsd4_session) > PAGE_SIZE);
544 664
545 status = nfserr_serverfault; 665 status = nfserr_jukebox;
546 /* allocate struct nfsd4_session and slot table pointers in one piece */ 666 /* allocate struct nfsd4_session and slot table pointers in one piece */
547 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); 667 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
548 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); 668 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
@@ -591,10 +711,8 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
591 711
592 dump_sessionid(__func__, sessionid); 712 dump_sessionid(__func__, sessionid);
593 idx = hash_sessionid(sessionid); 713 idx = hash_sessionid(sessionid);
594 dprintk("%s: idx is %d\n", __func__, idx);
595 /* Search in the appropriate list */ 714 /* Search in the appropriate list */
596 list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { 715 list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
597 dump_sessionid("list traversal", &elem->se_sessionid);
598 if (!memcmp(elem->se_sessionid.data, sessionid->data, 716 if (!memcmp(elem->se_sessionid.data, sessionid->data,
599 NFS4_MAX_SESSIONID_LEN)) { 717 NFS4_MAX_SESSIONID_LEN)) {
600 return elem; 718 return elem;
@@ -714,7 +832,6 @@ release_session_client(struct nfsd4_session *session)
714 } else 832 } else
715 renew_client_locked(clp); 833 renew_client_locked(clp);
716 spin_unlock(&client_lock); 834 spin_unlock(&client_lock);
717 nfsd4_put_session(session);
718} 835}
719 836
720/* must be called under the client_lock */ 837/* must be called under the client_lock */
@@ -1220,7 +1337,7 @@ out_new:
1220 /* Normal case */ 1337 /* Normal case */
1221 new = create_client(exid->clname, dname, rqstp, &verf); 1338 new = create_client(exid->clname, dname, rqstp, &verf);
1222 if (new == NULL) { 1339 if (new == NULL) {
1223 status = nfserr_serverfault; 1340 status = nfserr_jukebox;
1224 goto out; 1341 goto out;
1225 } 1342 }
1226 1343
@@ -1760,6 +1877,8 @@ alloc_init_file(struct inode *ino)
1760 fp->fi_inode = igrab(ino); 1877 fp->fi_inode = igrab(ino);
1761 fp->fi_id = current_fileid++; 1878 fp->fi_id = current_fileid++;
1762 fp->fi_had_conflict = false; 1879 fp->fi_had_conflict = false;
1880 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
1881 memset(fp->fi_access, 0, sizeof(fp->fi_access));
1763 spin_lock(&recall_lock); 1882 spin_lock(&recall_lock);
1764 list_add(&fp->fi_hash, &file_hashtbl[hashval]); 1883 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1765 spin_unlock(&recall_lock); 1884 spin_unlock(&recall_lock);
@@ -1971,57 +2090,6 @@ static inline int deny_valid(u32 x)
1971} 2090}
1972 2091
1973/* 2092/*
1974 * We store the NONE, READ, WRITE, and BOTH bits separately in the
1975 * st_{access,deny}_bmap field of the stateid, in order to track not
1976 * only what share bits are currently in force, but also what
1977 * combinations of share bits previous opens have used. This allows us
1978 * to enforce the recommendation of rfc 3530 14.2.19 that the server
1979 * return an error if the client attempt to downgrade to a combination
1980 * of share bits not explicable by closing some of its previous opens.
1981 *
1982 * XXX: This enforcement is actually incomplete, since we don't keep
1983 * track of access/deny bit combinations; so, e.g., we allow:
1984 *
1985 * OPEN allow read, deny write
1986 * OPEN allow both, deny none
1987 * DOWNGRADE allow read, deny none
1988 *
1989 * which we should reject.
1990 */
1991static void
1992set_access(unsigned int *access, unsigned long bmap) {
1993 int i;
1994
1995 *access = 0;
1996 for (i = 1; i < 4; i++) {
1997 if (test_bit(i, &bmap))
1998 *access |= i;
1999 }
2000}
2001
2002static void
2003set_deny(unsigned int *deny, unsigned long bmap) {
2004 int i;
2005
2006 *deny = 0;
2007 for (i = 0; i < 4; i++) {
2008 if (test_bit(i, &bmap))
2009 *deny |= i ;
2010 }
2011}
2012
2013static int
2014test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
2015 unsigned int access, deny;
2016
2017 set_access(&access, stp->st_access_bmap);
2018 set_deny(&deny, stp->st_deny_bmap);
2019 if ((access & open->op_share_deny) || (deny & open->op_share_access))
2020 return 0;
2021 return 1;
2022}
2023
2024/*
2025 * Called to check deny when READ with all zero stateid or 2093 * Called to check deny when READ with all zero stateid or
2026 * WRITE with all zero or all one stateid 2094 * WRITE with all zero or all one stateid
2027 */ 2095 */
@@ -2052,14 +2120,12 @@ out:
2052} 2120}
2053 2121
2054static inline void 2122static inline void
2055nfs4_file_downgrade(struct file *filp, unsigned int share_access) 2123nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
2056{ 2124{
2057 if (share_access & NFS4_SHARE_ACCESS_WRITE) { 2125 if (share_access & NFS4_SHARE_ACCESS_WRITE)
2058 drop_file_write_access(filp); 2126 nfs4_file_put_access(fp, O_WRONLY);
2059 spin_lock(&filp->f_lock); 2127 if (share_access & NFS4_SHARE_ACCESS_READ)
2060 filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; 2128 nfs4_file_put_access(fp, O_RDONLY);
2061 spin_unlock(&filp->f_lock);
2062 }
2063} 2129}
2064 2130
2065/* 2131/*
@@ -2255,6 +2321,13 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2255 return NULL; 2321 return NULL;
2256} 2322}
2257 2323
2324int share_access_to_flags(u32 share_access)
2325{
2326 share_access &= ~NFS4_SHARE_WANT_MASK;
2327
2328 return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
2329}
2330
2258static __be32 2331static __be32
2259nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, 2332nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
2260 struct nfs4_delegation **dp) 2333 struct nfs4_delegation **dp)
@@ -2265,8 +2338,7 @@ nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
2265 *dp = find_delegation_file(fp, &open->op_delegate_stateid); 2338 *dp = find_delegation_file(fp, &open->op_delegate_stateid);
2266 if (*dp == NULL) 2339 if (*dp == NULL)
2267 goto out; 2340 goto out;
2268 flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ? 2341 flags = share_access_to_flags(open->op_share_access);
2269 RD_STATE : WR_STATE;
2270 status = nfs4_check_delegmode(*dp, flags); 2342 status = nfs4_check_delegmode(*dp, flags);
2271 if (status) 2343 if (status)
2272 *dp = NULL; 2344 *dp = NULL;
@@ -2308,30 +2380,53 @@ nfs4_alloc_stateid(void)
2308 return kmem_cache_alloc(stateid_slab, GFP_KERNEL); 2380 return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
2309} 2381}
2310 2382
2383static inline int nfs4_access_to_access(u32 nfs4_access)
2384{
2385 int flags = 0;
2386
2387 if (nfs4_access & NFS4_SHARE_ACCESS_READ)
2388 flags |= NFSD_MAY_READ;
2389 if (nfs4_access & NFS4_SHARE_ACCESS_WRITE)
2390 flags |= NFSD_MAY_WRITE;
2391 return flags;
2392}
2393
2394static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file
2395*fp, struct svc_fh *cur_fh, u32 nfs4_access)
2396{
2397 __be32 status;
2398 int oflag = nfs4_access_to_omode(nfs4_access);
2399 int access = nfs4_access_to_access(nfs4_access);
2400
2401 if (!fp->fi_fds[oflag]) {
2402 status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
2403 &fp->fi_fds[oflag]);
2404 if (status == nfserr_dropit)
2405 status = nfserr_jukebox;
2406 if (status)
2407 return status;
2408 }
2409 nfs4_file_get_access(fp, oflag);
2410
2411 return nfs_ok;
2412}
2413
2311static __be32 2414static __be32
2312nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, 2415nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
2313 struct nfs4_delegation *dp, 2416 struct nfs4_file *fp, struct svc_fh *cur_fh,
2314 struct svc_fh *cur_fh, int flags) 2417 struct nfsd4_open *open)
2315{ 2418{
2316 struct nfs4_stateid *stp; 2419 struct nfs4_stateid *stp;
2420 __be32 status;
2317 2421
2318 stp = nfs4_alloc_stateid(); 2422 stp = nfs4_alloc_stateid();
2319 if (stp == NULL) 2423 if (stp == NULL)
2320 return nfserr_resource; 2424 return nfserr_resource;
2321 2425
2322 if (dp) { 2426 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open->op_share_access);
2323 get_file(dp->dl_vfs_file); 2427 if (status) {
2324 stp->st_vfs_file = dp->dl_vfs_file; 2428 kmem_cache_free(stateid_slab, stp);
2325 } else { 2429 return status;
2326 __be32 status;
2327 status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
2328 &stp->st_vfs_file);
2329 if (status) {
2330 if (status == nfserr_dropit)
2331 status = nfserr_jukebox;
2332 kmem_cache_free(stateid_slab, stp);
2333 return status;
2334 }
2335 } 2430 }
2336 *stpp = stp; 2431 *stpp = stp;
2337 return 0; 2432 return 0;
@@ -2353,35 +2448,30 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
2353} 2448}
2354 2449
2355static __be32 2450static __be32
2356nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) 2451nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
2357{ 2452{
2358 struct file *filp = stp->st_vfs_file; 2453 u32 op_share_access, new_access;
2359 struct inode *inode = filp->f_path.dentry->d_inode;
2360 unsigned int share_access, new_writer;
2361 __be32 status; 2454 __be32 status;
2362 2455
2363 set_access(&share_access, stp->st_access_bmap); 2456 set_access(&new_access, stp->st_access_bmap);
2364 new_writer = (~share_access) & open->op_share_access 2457 new_access = (~new_access) & open->op_share_access & ~NFS4_SHARE_WANT_MASK;
2365 & NFS4_SHARE_ACCESS_WRITE; 2458
2366 2459 if (new_access) {
2367 if (new_writer) { 2460 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, new_access);
2368 int err = get_write_access(inode); 2461 if (status)
2369 if (err) 2462 return status;
2370 return nfserrno(err);
2371 err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
2372 if (err)
2373 return nfserrno(err);
2374 file_take_write(filp);
2375 } 2463 }
2376 status = nfsd4_truncate(rqstp, cur_fh, open); 2464 status = nfsd4_truncate(rqstp, cur_fh, open);
2377 if (status) { 2465 if (status) {
2378 if (new_writer) 2466 if (new_access) {
2379 put_write_access(inode); 2467 int oflag = nfs4_access_to_omode(new_access);
2468 nfs4_file_put_access(fp, oflag);
2469 }
2380 return status; 2470 return status;
2381 } 2471 }
2382 /* remember the open */ 2472 /* remember the open */
2383 filp->f_mode |= open->op_share_access; 2473 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK;
2384 __set_bit(open->op_share_access, &stp->st_access_bmap); 2474 __set_bit(op_share_access, &stp->st_access_bmap);
2385 __set_bit(open->op_share_deny, &stp->st_deny_bmap); 2475 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
2386 2476
2387 return nfs_ok; 2477 return nfs_ok;
@@ -2444,13 +2534,14 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2444 fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; 2534 fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2445 fl.fl_end = OFFSET_MAX; 2535 fl.fl_end = OFFSET_MAX;
2446 fl.fl_owner = (fl_owner_t)dp; 2536 fl.fl_owner = (fl_owner_t)dp;
2447 fl.fl_file = stp->st_vfs_file; 2537 fl.fl_file = find_readable_file(stp->st_file);
2538 BUG_ON(!fl.fl_file);
2448 fl.fl_pid = current->tgid; 2539 fl.fl_pid = current->tgid;
2449 2540
2450 /* vfs_setlease checks to see if delegation should be handed out. 2541 /* vfs_setlease checks to see if delegation should be handed out.
2451 * the lock_manager callbacks fl_mylease and fl_change are used 2542 * the lock_manager callbacks fl_mylease and fl_change are used
2452 */ 2543 */
2453 if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) { 2544 if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) {
2454 dprintk("NFSD: setlease failed [%d], no delegation\n", status); 2545 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2455 unhash_delegation(dp); 2546 unhash_delegation(dp);
2456 flag = NFS4_OPEN_DELEGATE_NONE; 2547 flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2514,18 +2605,12 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2514 */ 2605 */
2515 if (stp) { 2606 if (stp) {
2516 /* Stateid was found, this is an OPEN upgrade */ 2607 /* Stateid was found, this is an OPEN upgrade */
2517 status = nfs4_upgrade_open(rqstp, current_fh, stp, open); 2608 status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
2518 if (status) 2609 if (status)
2519 goto out; 2610 goto out;
2520 update_stateid(&stp->st_stateid); 2611 update_stateid(&stp->st_stateid);
2521 } else { 2612 } else {
2522 /* Stateid was not found, this is a new OPEN */ 2613 status = nfs4_new_open(rqstp, &stp, fp, current_fh, open);
2523 int flags = 0;
2524 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
2525 flags |= NFSD_MAY_READ;
2526 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2527 flags |= NFSD_MAY_WRITE;
2528 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
2529 if (status) 2614 if (status)
2530 goto out; 2615 goto out;
2531 init_stateid(stp, fp, open); 2616 init_stateid(stp, fp, open);
@@ -2727,7 +2812,7 @@ search_close_lru(u32 st_id, int flags)
2727static inline int 2812static inline int
2728nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) 2813nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
2729{ 2814{
2730 return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode; 2815 return fhp->fh_dentry->d_inode != stp->st_file->fi_inode;
2731} 2816}
2732 2817
2733static int 2818static int
@@ -2760,6 +2845,9 @@ __be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
2760{ 2845{
2761 __be32 status = nfserr_openmode; 2846 __be32 status = nfserr_openmode;
2762 2847
2848 /* For lock stateid's, we test the parent open, not the lock: */
2849 if (stp->st_openstp)
2850 stp = stp->st_openstp;
2763 if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) 2851 if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
2764 goto out; 2852 goto out;
2765 if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) 2853 if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
@@ -2872,7 +2960,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2872 goto out; 2960 goto out;
2873 renew_client(dp->dl_client); 2961 renew_client(dp->dl_client);
2874 if (filpp) 2962 if (filpp)
2875 *filpp = dp->dl_vfs_file; 2963 *filpp = find_readable_file(dp->dl_file);
2964 BUG_ON(!*filpp);
2876 } else { /* open or lock stateid */ 2965 } else { /* open or lock stateid */
2877 stp = find_stateid(stateid, flags); 2966 stp = find_stateid(stateid, flags);
2878 if (!stp) 2967 if (!stp)
@@ -2889,8 +2978,13 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2889 if (status) 2978 if (status)
2890 goto out; 2979 goto out;
2891 renew_client(stp->st_stateowner->so_client); 2980 renew_client(stp->st_stateowner->so_client);
2892 if (filpp) 2981 if (filpp) {
2893 *filpp = stp->st_vfs_file; 2982 if (flags & RD_STATE)
2983 *filpp = find_readable_file(stp->st_file);
2984 else
2985 *filpp = find_writeable_file(stp->st_file);
2986 BUG_ON(!*filpp); /* assured by check_openmode */
2987 }
2894 } 2988 }
2895 status = nfs_ok; 2989 status = nfs_ok;
2896out: 2990out:
@@ -3126,8 +3220,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3126 goto out; 3220 goto out;
3127 } 3221 }
3128 set_access(&share_access, stp->st_access_bmap); 3222 set_access(&share_access, stp->st_access_bmap);
3129 nfs4_file_downgrade(stp->st_vfs_file, 3223 nfs4_file_downgrade(stp->st_file, share_access & ~od->od_share_access);
3130 share_access & ~od->od_share_access);
3131 3224
3132 reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); 3225 reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
3133 reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); 3226 reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
@@ -3346,11 +3439,9 @@ static inline void
3346nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) 3439nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
3347{ 3440{
3348 struct nfs4_stateowner *sop; 3441 struct nfs4_stateowner *sop;
3349 unsigned int hval;
3350 3442
3351 if (fl->fl_lmops == &nfsd_posix_mng_ops) { 3443 if (fl->fl_lmops == &nfsd_posix_mng_ops) {
3352 sop = (struct nfs4_stateowner *) fl->fl_owner; 3444 sop = (struct nfs4_stateowner *) fl->fl_owner;
3353 hval = lockownerid_hashval(sop->so_id);
3354 kref_get(&sop->so_ref); 3445 kref_get(&sop->so_ref);
3355 deny->ld_sop = sop; 3446 deny->ld_sop = sop;
3356 deny->ld_clientid = sop->so_client->cl_clientid; 3447 deny->ld_clientid = sop->so_client->cl_clientid;
@@ -3446,8 +3537,6 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
3446 stp->st_stateid.si_stateownerid = sop->so_id; 3537 stp->st_stateid.si_stateownerid = sop->so_id;
3447 stp->st_stateid.si_fileid = fp->fi_id; 3538 stp->st_stateid.si_fileid = fp->fi_id;
3448 stp->st_stateid.si_generation = 0; 3539 stp->st_stateid.si_generation = 0;
3449 stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
3450 stp->st_access_bmap = open_stp->st_access_bmap;
3451 stp->st_deny_bmap = open_stp->st_deny_bmap; 3540 stp->st_deny_bmap = open_stp->st_deny_bmap;
3452 stp->st_openstp = open_stp; 3541 stp->st_openstp = open_stp;
3453 3542
@@ -3547,7 +3636,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3547 lock_sop = lock->lk_replay_owner; 3636 lock_sop = lock->lk_replay_owner;
3548 } 3637 }
3549 /* lock->lk_replay_owner and lock_stp have been created or found */ 3638 /* lock->lk_replay_owner and lock_stp have been created or found */
3550 filp = lock_stp->st_vfs_file;
3551 3639
3552 status = nfserr_grace; 3640 status = nfserr_grace;
3553 if (locks_in_grace() && !lock->lk_reclaim) 3641 if (locks_in_grace() && !lock->lk_reclaim)
@@ -3560,11 +3648,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3560 switch (lock->lk_type) { 3648 switch (lock->lk_type) {
3561 case NFS4_READ_LT: 3649 case NFS4_READ_LT:
3562 case NFS4_READW_LT: 3650 case NFS4_READW_LT:
3651 filp = find_readable_file(lock_stp->st_file);
3563 file_lock.fl_type = F_RDLCK; 3652 file_lock.fl_type = F_RDLCK;
3564 cmd = F_SETLK; 3653 cmd = F_SETLK;
3565 break; 3654 break;
3566 case NFS4_WRITE_LT: 3655 case NFS4_WRITE_LT:
3567 case NFS4_WRITEW_LT: 3656 case NFS4_WRITEW_LT:
3657 filp = find_writeable_file(lock_stp->st_file);
3568 file_lock.fl_type = F_WRLCK; 3658 file_lock.fl_type = F_WRLCK;
3569 cmd = F_SETLK; 3659 cmd = F_SETLK;
3570 break; 3660 break;
@@ -3572,6 +3662,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3572 status = nfserr_inval; 3662 status = nfserr_inval;
3573 goto out; 3663 goto out;
3574 } 3664 }
3665 if (!filp) {
3666 status = nfserr_openmode;
3667 goto out;
3668 }
3575 file_lock.fl_owner = (fl_owner_t)lock_sop; 3669 file_lock.fl_owner = (fl_owner_t)lock_sop;
3576 file_lock.fl_pid = current->tgid; 3670 file_lock.fl_pid = current->tgid;
3577 file_lock.fl_file = filp; 3671 file_lock.fl_file = filp;
@@ -3740,7 +3834,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3740 &locku->lu_stateowner, &stp, NULL))) 3834 &locku->lu_stateowner, &stp, NULL)))
3741 goto out; 3835 goto out;
3742 3836
3743 filp = stp->st_vfs_file; 3837 filp = find_any_file(stp->st_file);
3838 if (!filp) {
3839 status = nfserr_lock_range;
3840 goto out;
3841 }
3744 BUG_ON(!filp); 3842 BUG_ON(!filp);
3745 locks_init_lock(&file_lock); 3843 locks_init_lock(&file_lock);
3746 file_lock.fl_type = F_UNLCK; 3844 file_lock.fl_type = F_UNLCK;
@@ -3787,10 +3885,10 @@ out_nfserr:
3787 * 0: no locks held by lockowner 3885 * 0: no locks held by lockowner
3788 */ 3886 */
3789static int 3887static int
3790check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) 3888check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner)
3791{ 3889{
3792 struct file_lock **flpp; 3890 struct file_lock **flpp;
3793 struct inode *inode = filp->f_path.dentry->d_inode; 3891 struct inode *inode = filp->fi_inode;
3794 int status = 0; 3892 int status = 0;
3795 3893
3796 lock_kernel(); 3894 lock_kernel();
@@ -3841,7 +3939,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
3841 continue; 3939 continue;
3842 list_for_each_entry(stp, &sop->so_stateids, 3940 list_for_each_entry(stp, &sop->so_stateids,
3843 st_perstateowner) { 3941 st_perstateowner) {
3844 if (check_for_locks(stp->st_vfs_file, sop)) 3942 if (check_for_locks(stp->st_file, sop))
3845 goto out; 3943 goto out;
3846 /* Note: so_perclient unused for lockowners, 3944 /* Note: so_perclient unused for lockowners,
3847 * so it's OK to fool with here. */ 3945 * so it's OK to fool with here. */
@@ -4066,16 +4164,8 @@ out_free_laundry:
4066int 4164int
4067nfs4_state_start(void) 4165nfs4_state_start(void)
4068{ 4166{
4069 int ret;
4070
4071 if (nfs4_init)
4072 return 0;
4073 nfsd4_load_reboot_recovery_data(); 4167 nfsd4_load_reboot_recovery_data();
4074 ret = __nfs4_state_start(); 4168 return __nfs4_state_start();
4075 if (ret)
4076 return ret;
4077 nfs4_init = 1;
4078 return 0;
4079} 4169}
4080 4170
4081static void 4171static void
@@ -4110,7 +4200,6 @@ __nfs4_state_shutdown(void)
4110 } 4200 }
4111 4201
4112 nfsd4_shutdown_recdir(); 4202 nfsd4_shutdown_recdir();
4113 nfs4_init = 0;
4114} 4203}
4115 4204
4116void 4205void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ac17a7080239..1a468bbd330f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1756,6 +1756,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1756 struct nfs4_acl *acl = NULL; 1756 struct nfs4_acl *acl = NULL;
1757 struct nfsd4_compoundres *resp = rqstp->rq_resp; 1757 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1758 u32 minorversion = resp->cstate.minorversion; 1758 u32 minorversion = resp->cstate.minorversion;
1759 struct path path = {
1760 .mnt = exp->ex_path.mnt,
1761 .dentry = dentry,
1762 };
1759 1763
1760 BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); 1764 BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
1761 BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); 1765 BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
@@ -1776,7 +1780,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1776 FATTR4_WORD0_MAXNAME)) || 1780 FATTR4_WORD0_MAXNAME)) ||
1777 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | 1781 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
1778 FATTR4_WORD1_SPACE_TOTAL))) { 1782 FATTR4_WORD1_SPACE_TOTAL))) {
1779 err = vfs_statfs(dentry, &statfs); 1783 err = vfs_statfs(&path, &statfs);
1780 if (err) 1784 if (err)
1781 goto out_nfserr; 1785 goto out_nfserr;
1782 } 1786 }
@@ -2630,7 +2634,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
2630 } 2634 }
2631 read->rd_vlen = v; 2635 read->rd_vlen = v;
2632 2636
2633 nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, 2637 nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp,
2634 read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, 2638 read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
2635 &maxcount); 2639 &maxcount);
2636 2640
@@ -3325,6 +3329,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3325 } 3329 }
3326 /* Renew the clientid on success and on replay */ 3330 /* Renew the clientid on success and on replay */
3327 release_session_client(cs->session); 3331 release_session_client(cs->session);
3332 nfsd4_put_session(cs->session);
3328 } 3333 }
3329 return 1; 3334 return 1;
3330} 3335}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 508941c23af7..b53b1d042f1f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -949,15 +949,12 @@ static ssize_t __write_ports_addfd(char *buf)
949 if (err != 0) 949 if (err != 0)
950 return err; 950 return err;
951 951
952 err = lockd_up();
953 if (err != 0)
954 goto out;
955
956 err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); 952 err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
957 if (err < 0) 953 if (err < 0) {
958 lockd_down(); 954 svc_destroy(nfsd_serv);
955 return err;
956 }
959 957
960out:
961 /* Decrease the count, but don't shut down the service */ 958 /* Decrease the count, but don't shut down the service */
962 nfsd_serv->sv_nrthreads--; 959 nfsd_serv->sv_nrthreads--;
963 return err; 960 return err;
@@ -978,9 +975,6 @@ static ssize_t __write_ports_delfd(char *buf)
978 if (nfsd_serv != NULL) 975 if (nfsd_serv != NULL)
979 len = svc_sock_names(nfsd_serv, buf, 976 len = svc_sock_names(nfsd_serv, buf,
980 SIMPLE_TRANSACTION_LIMIT, toclose); 977 SIMPLE_TRANSACTION_LIMIT, toclose);
981 if (len >= 0)
982 lockd_down();
983
984 kfree(toclose); 978 kfree(toclose);
985 return len; 979 return len;
986} 980}
@@ -1014,6 +1008,9 @@ static ssize_t __write_ports_addxprt(char *buf)
1014 PF_INET6, port, SVC_SOCK_ANONYMOUS); 1008 PF_INET6, port, SVC_SOCK_ANONYMOUS);
1015 if (err < 0 && err != -EAFNOSUPPORT) 1009 if (err < 0 && err != -EAFNOSUPPORT)
1016 goto out_close; 1010 goto out_close;
1011
1012 /* Decrease the count, but don't shut down the service */
1013 nfsd_serv->sv_nrthreads--;
1017 return 0; 1014 return 0;
1018out_close: 1015out_close:
1019 xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); 1016 xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port);
@@ -1022,8 +1019,7 @@ out_close:
1022 svc_xprt_put(xprt); 1019 svc_xprt_put(xprt);
1023 } 1020 }
1024out_err: 1021out_err:
1025 /* Decrease the count, but don't shut down the service */ 1022 svc_destroy(nfsd_serv);
1026 nfsd_serv->sv_nrthreads--;
1027 return err; 1023 return err;
1028} 1024}
1029 1025
@@ -1194,7 +1190,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
1194 bsize = NFSSVC_MAXBLKSIZE; 1190 bsize = NFSSVC_MAXBLKSIZE;
1195 bsize &= ~(1024-1); 1191 bsize &= ~(1024-1);
1196 mutex_lock(&nfsd_mutex); 1192 mutex_lock(&nfsd_mutex);
1197 if (nfsd_serv && nfsd_serv->sv_nrthreads) { 1193 if (nfsd_serv) {
1198 mutex_unlock(&nfsd_mutex); 1194 mutex_unlock(&nfsd_mutex);
1199 return -EBUSY; 1195 return -EBUSY;
1200 } 1196 }
@@ -1310,6 +1306,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
1310 return -EINVAL; 1306 return -EINVAL;
1311 1307
1312 status = nfs4_reset_recoverydir(recdir); 1308 status = nfs4_reset_recoverydir(recdir);
1309 if (status)
1310 return status;
1313 } 1311 }
1314 1312
1315 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", 1313 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n",
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 72377761270e..b76ac3a82e39 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -153,6 +153,7 @@ void nfsd_lockd_shutdown(void);
153#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID) 153#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
154#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK) 154#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
155#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME) 155#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
156#define nfserr_lock_range cpu_to_be32(NFSERR_LOCK_RANGE)
156#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH) 157#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
157#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) 158#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
158#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) 159#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a047ad6111ef..08e17264784b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -144,7 +144,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
144 svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); 144 svc_reserve_auth(rqstp, (19<<2) + argp->count + 4);
145 145
146 resp->count = argp->count; 146 resp->count = argp->count;
147 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, 147 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
148 argp->offset, 148 argp->offset,
149 rqstp->rq_vec, argp->vlen, 149 rqstp->rq_vec, argp->vlen,
150 &resp->count); 150 &resp->count);
@@ -290,7 +290,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
290 * gospel of sun micro 290 * gospel of sun micro
291 */ 291 */
292 if (type != S_IFREG) { 292 if (type != S_IFREG) {
293 int is_borc = 0;
294 if (type != S_IFBLK && type != S_IFCHR) { 293 if (type != S_IFBLK && type != S_IFCHR) {
295 rdev = 0; 294 rdev = 0;
296 } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) { 295 } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) {
@@ -298,7 +297,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
298 type = S_IFIFO; 297 type = S_IFIFO;
299 } else { 298 } else {
300 /* Okay, char or block special */ 299 /* Okay, char or block special */
301 is_borc = 1;
302 if (!rdev) 300 if (!rdev)
303 rdev = wanted; 301 rdev = wanted;
304 } 302 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 06b2a26edfe0..e2c43464f237 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -180,15 +180,80 @@ int nfsd_nrthreads(void)
180 return rv; 180 return rv;
181} 181}
182 182
183static int nfsd_init_socks(int port)
184{
185 int error;
186 if (!list_empty(&nfsd_serv->sv_permsocks))
187 return 0;
188
189 error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
190 SVC_SOCK_DEFAULTS);
191 if (error < 0)
192 return error;
193
194 error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
195 SVC_SOCK_DEFAULTS);
196 if (error < 0)
197 return error;
198
199 return 0;
200}
201
202static bool nfsd_up = false;
203
204static int nfsd_startup(unsigned short port, int nrservs)
205{
206 int ret;
207
208 if (nfsd_up)
209 return 0;
210 /*
211 * Readahead param cache - will no-op if it already exists.
212 * (Note therefore results will be suboptimal if number of
213 * threads is modified after nfsd start.)
214 */
215 ret = nfsd_racache_init(2*nrservs);
216 if (ret)
217 return ret;
218 ret = nfsd_init_socks(port);
219 if (ret)
220 goto out_racache;
221 ret = lockd_up();
222 if (ret)
223 goto out_racache;
224 ret = nfs4_state_start();
225 if (ret)
226 goto out_lockd;
227 nfsd_up = true;
228 return 0;
229out_lockd:
230 lockd_down();
231out_racache:
232 nfsd_racache_shutdown();
233 return ret;
234}
235
236static void nfsd_shutdown(void)
237{
238 /*
239 * write_ports can create the server without actually starting
240 * any threads--if we get shut down before any threads are
241 * started, then nfsd_last_thread will be run before any of this
242 * other initialization has been done.
243 */
244 if (!nfsd_up)
245 return;
246 nfs4_state_shutdown();
247 lockd_down();
248 nfsd_racache_shutdown();
249 nfsd_up = false;
250}
251
183static void nfsd_last_thread(struct svc_serv *serv) 252static void nfsd_last_thread(struct svc_serv *serv)
184{ 253{
185 /* When last nfsd thread exits we need to do some clean-up */ 254 /* When last nfsd thread exits we need to do some clean-up */
186 struct svc_xprt *xprt;
187 list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
188 lockd_down();
189 nfsd_serv = NULL; 255 nfsd_serv = NULL;
190 nfsd_racache_shutdown(); 256 nfsd_shutdown();
191 nfs4_state_shutdown();
192 257
193 printk(KERN_WARNING "nfsd: last server has exited, flushing export " 258 printk(KERN_WARNING "nfsd: last server has exited, flushing export "
194 "cache\n"); 259 "cache\n");
@@ -263,45 +328,18 @@ int nfsd_create_serv(void)
263 nfsd_max_blksize >= 8*1024*2) 328 nfsd_max_blksize >= 8*1024*2)
264 nfsd_max_blksize /= 2; 329 nfsd_max_blksize /= 2;
265 } 330 }
331 nfsd_reset_versions();
266 332
267 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, 333 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
268 nfsd_last_thread, nfsd, THIS_MODULE); 334 nfsd_last_thread, nfsd, THIS_MODULE);
269 if (nfsd_serv == NULL) 335 if (nfsd_serv == NULL)
270 err = -ENOMEM; 336 return -ENOMEM;
271 else
272 set_max_drc();
273 337
338 set_max_drc();
274 do_gettimeofday(&nfssvc_boot); /* record boot time */ 339 do_gettimeofday(&nfssvc_boot); /* record boot time */
275 return err; 340 return err;
276} 341}
277 342
278static int nfsd_init_socks(int port)
279{
280 int error;
281 if (!list_empty(&nfsd_serv->sv_permsocks))
282 return 0;
283
284 error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
285 SVC_SOCK_DEFAULTS);
286 if (error < 0)
287 return error;
288
289 error = lockd_up();
290 if (error < 0)
291 return error;
292
293 error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
294 SVC_SOCK_DEFAULTS);
295 if (error < 0)
296 return error;
297
298 error = lockd_up();
299 if (error < 0)
300 return error;
301
302 return 0;
303}
304
305int nfsd_nrpools(void) 343int nfsd_nrpools(void)
306{ 344{
307 if (nfsd_serv == NULL) 345 if (nfsd_serv == NULL)
@@ -376,10 +414,16 @@ int nfsd_set_nrthreads(int n, int *nthreads)
376 return err; 414 return err;
377} 415}
378 416
417/*
418 * Adjust the number of threads and return the new number of threads.
419 * This is also the function that starts the server if necessary, if
420 * this is the first time nrservs is nonzero.
421 */
379int 422int
380nfsd_svc(unsigned short port, int nrservs) 423nfsd_svc(unsigned short port, int nrservs)
381{ 424{
382 int error; 425 int error;
426 bool nfsd_up_before;
383 427
384 mutex_lock(&nfsd_mutex); 428 mutex_lock(&nfsd_mutex);
385 dprintk("nfsd: creating service\n"); 429 dprintk("nfsd: creating service\n");
@@ -391,34 +435,29 @@ nfsd_svc(unsigned short port, int nrservs)
391 if (nrservs == 0 && nfsd_serv == NULL) 435 if (nrservs == 0 && nfsd_serv == NULL)
392 goto out; 436 goto out;
393 437
394 /* Readahead param cache - will no-op if it already exists */ 438 error = nfsd_create_serv();
395 error = nfsd_racache_init(2*nrservs);
396 if (error<0)
397 goto out;
398 error = nfs4_state_start();
399 if (error) 439 if (error)
400 goto out; 440 goto out;
401 441
402 nfsd_reset_versions(); 442 nfsd_up_before = nfsd_up;
403
404 error = nfsd_create_serv();
405 443
444 error = nfsd_startup(port, nrservs);
406 if (error) 445 if (error)
407 goto out; 446 goto out_destroy;
408 error = nfsd_init_socks(port);
409 if (error)
410 goto failure;
411
412 error = svc_set_num_threads(nfsd_serv, NULL, nrservs); 447 error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
413 if (error == 0) 448 if (error)
414 /* We are holding a reference to nfsd_serv which 449 goto out_shutdown;
415 * we don't want to count in the return value, 450 /* We are holding a reference to nfsd_serv which
416 * so subtract 1 451 * we don't want to count in the return value,
417 */ 452 * so subtract 1
418 error = nfsd_serv->sv_nrthreads - 1; 453 */
419 failure: 454 error = nfsd_serv->sv_nrthreads - 1;
455out_shutdown:
456 if (error < 0 && !nfsd_up_before)
457 nfsd_shutdown();
458out_destroy:
420 svc_destroy(nfsd_serv); /* Release server */ 459 svc_destroy(nfsd_serv); /* Release server */
421 out: 460out:
422 mutex_unlock(&nfsd_mutex); 461 mutex_unlock(&nfsd_mutex);
423 return error; 462 return error;
424} 463}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 006c84230c7c..7731a75971dd 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -88,7 +88,6 @@ struct nfs4_delegation {
88 struct nfs4_client *dl_client; 88 struct nfs4_client *dl_client;
89 struct nfs4_file *dl_file; 89 struct nfs4_file *dl_file;
90 struct file_lock *dl_flock; 90 struct file_lock *dl_flock;
91 struct file *dl_vfs_file;
92 u32 dl_type; 91 u32 dl_type;
93 time_t dl_time; 92 time_t dl_time;
94/* For recall: */ 93/* For recall: */
@@ -342,12 +341,50 @@ struct nfs4_file {
342 struct list_head fi_hash; /* hash by "struct inode *" */ 341 struct list_head fi_hash; /* hash by "struct inode *" */
343 struct list_head fi_stateids; 342 struct list_head fi_stateids;
344 struct list_head fi_delegations; 343 struct list_head fi_delegations;
344 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
345 struct file * fi_fds[3];
346 /* One each for O_RDONLY, O_WRONLY: */
347 atomic_t fi_access[2];
348 /*
349 * Each open stateid contributes 1 to either fi_readers or
350 * fi_writers, or both, depending on the open mode. A
351 * delegation also takes an fi_readers reference. Lock
352 * stateid's take none.
353 */
354 atomic_t fi_readers;
355 atomic_t fi_writers;
345 struct inode *fi_inode; 356 struct inode *fi_inode;
346 u32 fi_id; /* used with stateowner->so_id 357 u32 fi_id; /* used with stateowner->so_id
347 * for stateid_hashtbl hash */ 358 * for stateid_hashtbl hash */
348 bool fi_had_conflict; 359 bool fi_had_conflict;
349}; 360};
350 361
362/* XXX: for first cut may fall back on returning file that doesn't work
363 * at all? */
364static inline struct file *find_writeable_file(struct nfs4_file *f)
365{
366 if (f->fi_fds[O_RDWR])
367 return f->fi_fds[O_RDWR];
368 return f->fi_fds[O_WRONLY];
369}
370
371static inline struct file *find_readable_file(struct nfs4_file *f)
372{
373 if (f->fi_fds[O_RDWR])
374 return f->fi_fds[O_RDWR];
375 return f->fi_fds[O_RDONLY];
376}
377
378static inline struct file *find_any_file(struct nfs4_file *f)
379{
380 if (f->fi_fds[O_RDWR])
381 return f->fi_fds[O_RDWR];
382 else if (f->fi_fds[O_RDWR])
383 return f->fi_fds[O_WRONLY];
384 else
385 return f->fi_fds[O_RDONLY];
386}
387
351/* 388/*
352* nfs4_stateid can either be an open stateid or (eventually) a lock stateid 389* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
353* 390*
@@ -373,7 +410,6 @@ struct nfs4_stateid {
373 struct nfs4_stateowner * st_stateowner; 410 struct nfs4_stateowner * st_stateowner;
374 struct nfs4_file * st_file; 411 struct nfs4_file * st_file;
375 stateid_t st_stateid; 412 stateid_t st_stateid;
376 struct file * st_vfs_file;
377 unsigned long st_access_bmap; 413 unsigned long st_access_bmap;
378 unsigned long st_deny_bmap; 414 unsigned long st_deny_bmap;
379 struct nfs4_stateid * st_openstp; 415 struct nfs4_stateid * st_openstp;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 16114a8e79d4..96360a83cb91 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -604,7 +604,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
604 return error; 604 return error;
605} 605}
606 606
607#endif /* defined(CONFIG_NFS_V4) */ 607#endif /* defined(CONFIG_NFSD_V4) */
608 608
609#ifdef CONFIG_NFSD_V3 609#ifdef CONFIG_NFSD_V3
610/* 610/*
@@ -903,7 +903,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
903 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 903 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
904{ 904{
905 struct inode *inode; 905 struct inode *inode;
906 struct raparms *ra;
907 mm_segment_t oldfs; 906 mm_segment_t oldfs;
908 __be32 err; 907 __be32 err;
909 int host_err; 908 int host_err;
@@ -914,12 +913,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
914 if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) 913 if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
915 goto out; 914 goto out;
916 915
917 /* Get readahead parameters */
918 ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
919
920 if (ra && ra->p_set)
921 file->f_ra = ra->p_ra;
922
923 if (file->f_op->splice_read && rqstp->rq_splice_ok) { 916 if (file->f_op->splice_read && rqstp->rq_splice_ok) {
924 struct splice_desc sd = { 917 struct splice_desc sd = {
925 .len = 0, 918 .len = 0,
@@ -937,16 +930,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
937 set_fs(oldfs); 930 set_fs(oldfs);
938 } 931 }
939 932
940 /* Write back readahead params */
941 if (ra) {
942 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
943 spin_lock(&rab->pb_lock);
944 ra->p_ra = file->f_ra;
945 ra->p_set = 1;
946 ra->p_count--;
947 spin_unlock(&rab->pb_lock);
948 }
949
950 if (host_err >= 0) { 933 if (host_err >= 0) {
951 nfsdstats.io_read += host_err; 934 nfsdstats.io_read += host_err;
952 *count = host_err; 935 *count = host_err;
@@ -1086,8 +1069,45 @@ out:
1086 * on entry. On return, *count contains the number of bytes actually read. 1069 * on entry. On return, *count contains the number of bytes actually read.
1087 * N.B. After this call fhp needs an fh_put 1070 * N.B. After this call fhp needs an fh_put
1088 */ 1071 */
1072__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1073 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
1074{
1075 struct file *file;
1076 struct inode *inode;
1077 struct raparms *ra;
1078 __be32 err;
1079
1080 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
1081 if (err)
1082 return err;
1083
1084 inode = file->f_path.dentry->d_inode;
1085
1086 /* Get readahead parameters */
1087 ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
1088
1089 if (ra && ra->p_set)
1090 file->f_ra = ra->p_ra;
1091
1092 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1093
1094 /* Write back readahead params */
1095 if (ra) {
1096 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
1097 spin_lock(&rab->pb_lock);
1098 ra->p_ra = file->f_ra;
1099 ra->p_set = 1;
1100 ra->p_count--;
1101 spin_unlock(&rab->pb_lock);
1102 }
1103
1104 nfsd_close(file);
1105 return err;
1106}
1107
1108/* As above, but use the provided file descriptor. */
1089__be32 1109__be32
1090nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1110nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1091 loff_t offset, struct kvec *vec, int vlen, 1111 loff_t offset, struct kvec *vec, int vlen,
1092 unsigned long *count) 1112 unsigned long *count)
1093{ 1113{
@@ -1099,13 +1119,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1099 if (err) 1119 if (err)
1100 goto out; 1120 goto out;
1101 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1121 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1102 } else { 1122 } else /* Note file may still be NULL in NFSv4 special stateid case: */
1103 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 1123 err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
1104 if (err)
1105 goto out;
1106 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1107 nfsd_close(file);
1108 }
1109out: 1124out:
1110 return err; 1125 return err;
1111} 1126}
@@ -1631,7 +1646,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1631 char *name, int len, struct svc_fh *tfhp) 1646 char *name, int len, struct svc_fh *tfhp)
1632{ 1647{
1633 struct dentry *ddir, *dnew, *dold; 1648 struct dentry *ddir, *dnew, *dold;
1634 struct inode *dirp, *dest; 1649 struct inode *dirp;
1635 __be32 err; 1650 __be32 err;
1636 int host_err; 1651 int host_err;
1637 1652
@@ -1659,7 +1674,6 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1659 goto out_nfserr; 1674 goto out_nfserr;
1660 1675
1661 dold = tfhp->fh_dentry; 1676 dold = tfhp->fh_dentry;
1662 dest = dold->d_inode;
1663 1677
1664 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); 1678 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
1665 if (host_err) { 1679 if (host_err) {
@@ -2019,8 +2033,14 @@ out:
2019__be32 2033__be32
2020nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) 2034nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
2021{ 2035{
2022 __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); 2036 struct path path = {
2023 if (!err && vfs_statfs(fhp->fh_dentry,stat)) 2037 .mnt = fhp->fh_export->ex_path.mnt,
2038 .dentry = fhp->fh_dentry,
2039 };
2040 __be32 err;
2041
2042 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
2043 if (!err && vfs_statfs(&path, stat))
2024 err = nfserr_io; 2044 err = nfserr_io;
2025 return err; 2045 return err;
2026} 2046}
@@ -2038,7 +2058,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2038 struct dentry *dentry, int acc) 2058 struct dentry *dentry, int acc)
2039{ 2059{
2040 struct inode *inode = dentry->d_inode; 2060 struct inode *inode = dentry->d_inode;
2041 struct path path;
2042 int err; 2061 int err;
2043 2062
2044 if (acc == NFSD_MAY_NOP) 2063 if (acc == NFSD_MAY_NOP)
@@ -2111,15 +2130,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2111 if (err == -EACCES && S_ISREG(inode->i_mode) && 2130 if (err == -EACCES && S_ISREG(inode->i_mode) &&
2112 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) 2131 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
2113 err = inode_permission(inode, MAY_EXEC); 2132 err = inode_permission(inode, MAY_EXEC);
2114 if (err)
2115 goto nfsd_out;
2116 2133
2117 /* Do integrity (permission) checking now, but defer incrementing
2118 * IMA counts to the actual file open.
2119 */
2120 path.mnt = exp->ex_path.mnt;
2121 path.dentry = dentry;
2122nfsd_out:
2123 return err? nfserrno(err) : 0; 2134 return err? nfserrno(err) : 0;
2124} 2135}
2125 2136
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 217a62c2a357..9a370a5e36b7 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -64,7 +64,9 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
64__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int, 64__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int,
65 int, struct file **); 65 int, struct file **);
66void nfsd_close(struct file *); 66void nfsd_close(struct file *);
67__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *, 67__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
68 loff_t, struct kvec *, int, unsigned long *);
69__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *,
68 loff_t, struct kvec *, int, unsigned long *); 70 loff_t, struct kvec *, int, unsigned long *);
69__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, 71__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
70 loff_t, struct kvec *,int, unsigned long *, int *); 72 loff_t, struct kvec *,int, unsigned long *, int *);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index effdbdbe6c11..3dbdc1d356bf 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -26,6 +26,8 @@
26#include "nilfs.h" 26#include "nilfs.h"
27#include "bmap.h" 27#include "bmap.h"
28#include "sb.h" 28#include "sb.h"
29#include "btree.h"
30#include "direct.h"
29#include "btnode.h" 31#include "btnode.h"
30#include "mdt.h" 32#include "mdt.h"
31#include "dat.h" 33#include "dat.h"
@@ -533,7 +535,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
533 535
534void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) 536void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
535{ 537{
536 memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); 538 memcpy(gcbmap, bmap, sizeof(*bmap));
537 init_rwsem(&gcbmap->b_sem); 539 init_rwsem(&gcbmap->b_sem);
538 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); 540 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
539 gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; 541 gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode;
@@ -541,7 +543,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
541 543
542void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) 544void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
543{ 545{
544 memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); 546 memcpy(bmap, gcbmap, sizeof(*bmap));
545 init_rwsem(&bmap->b_sem); 547 init_rwsem(&bmap->b_sem);
546 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); 548 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
547 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; 549 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index 9980d7dbab91..a20569b19929 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -32,11 +32,6 @@
32 32
33#define NILFS_BMAP_INVALID_PTR 0 33#define NILFS_BMAP_INVALID_PTR 0
34 34
35#define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey)
36#define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key)
37#define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr)
38#define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr)
39
40#define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) 35#define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff))
41 36
42 37
@@ -71,7 +66,7 @@ struct nilfs_bmap_operations {
71 int (*bop_delete)(struct nilfs_bmap *, __u64); 66 int (*bop_delete)(struct nilfs_bmap *, __u64);
72 void (*bop_clear)(struct nilfs_bmap *); 67 void (*bop_clear)(struct nilfs_bmap *);
73 68
74 int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *); 69 int (*bop_propagate)(struct nilfs_bmap *, struct buffer_head *);
75 void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, 70 void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *,
76 struct list_head *); 71 struct list_head *);
77 72
@@ -110,6 +105,7 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
110 * @b_last_allocated_ptr: last allocated ptr for data block 105 * @b_last_allocated_ptr: last allocated ptr for data block
111 * @b_ptr_type: pointer type 106 * @b_ptr_type: pointer type
112 * @b_state: state 107 * @b_state: state
108 * @b_nchildren_per_block: maximum number of child nodes for non-root nodes
113 */ 109 */
114struct nilfs_bmap { 110struct nilfs_bmap {
115 union { 111 union {
@@ -123,6 +119,7 @@ struct nilfs_bmap {
123 __u64 b_last_allocated_ptr; 119 __u64 b_last_allocated_ptr;
124 int b_ptr_type; 120 int b_ptr_type;
125 int b_state; 121 int b_state;
122 __u16 b_nchildren_per_block;
126}; 123};
127 124
128/* pointer type */ 125/* pointer type */
@@ -224,6 +221,13 @@ static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
224 nilfs_dat_abort_end(dat, &req->bpr_req); 221 nilfs_dat_abort_end(dat, &req->bpr_req);
225} 222}
226 223
224static inline void nilfs_bmap_set_target_v(struct nilfs_bmap *bmap, __u64 key,
225 __u64 ptr)
226{
227 bmap->b_last_allocated_key = key;
228 bmap->b_last_allocated_ptr = ptr;
229}
230
227__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, 231__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
228 const struct buffer_head *); 232 const struct buffer_head *);
229 233
diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h
deleted file mode 100644
index d41509bff47b..000000000000
--- a/fs/nilfs2/bmap_union.h
+++ /dev/null
@@ -1,42 +0,0 @@
1/*
2 * bmap_union.h - NILFS block mapping.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_BMAP_UNION_H
24#define _NILFS_BMAP_UNION_H
25
26#include "bmap.h"
27#include "direct.h"
28#include "btree.h"
29
30/**
31 * nilfs_bmap_union -
32 * @bi_bmap: bmap structure
33 * @bi_btree: direct map structure
34 * @bi_direct: B-tree structure
35 */
36union nilfs_bmap_union {
37 struct nilfs_bmap bi_bmap;
38 struct nilfs_direct bi_direct;
39 struct nilfs_btree bi_btree;
40};
41
42#endif /* _NILFS_BMAP_UNION_H */
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 447ce47a3306..f78ab1044d1d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -96,10 +96,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
96} 96}
97 97
98int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, 98int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
99 sector_t pblocknr, struct buffer_head **pbh) 99 sector_t pblocknr, int mode,
100 struct buffer_head **pbh, sector_t *submit_ptr)
100{ 101{
101 struct buffer_head *bh; 102 struct buffer_head *bh;
102 struct inode *inode = NILFS_BTNC_I(btnc); 103 struct inode *inode = NILFS_BTNC_I(btnc);
104 struct page *page;
103 int err; 105 int err;
104 106
105 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); 107 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
@@ -107,6 +109,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
107 return -ENOMEM; 109 return -ENOMEM;
108 110
109 err = -EEXIST; /* internal code */ 111 err = -EEXIST; /* internal code */
112 page = bh->b_page;
110 113
111 if (buffer_uptodate(bh) || buffer_dirty(bh)) 114 if (buffer_uptodate(bh) || buffer_dirty(bh))
112 goto found; 115 goto found;
@@ -125,7 +128,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
125 } 128 }
126 } 129 }
127 } 130 }
128 lock_buffer(bh); 131
132 if (mode == READA) {
133 if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) {
134 err = -EBUSY; /* internal code */
135 brelse(bh);
136 goto out_locked;
137 }
138 } else { /* mode == READ */
139 lock_buffer(bh);
140 }
129 if (buffer_uptodate(bh)) { 141 if (buffer_uptodate(bh)) {
130 unlock_buffer(bh); 142 unlock_buffer(bh);
131 err = -EEXIST; /* internal code */ 143 err = -EEXIST; /* internal code */
@@ -136,15 +148,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
136 bh->b_blocknr = pblocknr; /* set block address for read */ 148 bh->b_blocknr = pblocknr; /* set block address for read */
137 bh->b_end_io = end_buffer_read_sync; 149 bh->b_end_io = end_buffer_read_sync;
138 get_bh(bh); 150 get_bh(bh);
139 submit_bh(READ, bh); 151 submit_bh(mode, bh);
140 bh->b_blocknr = blocknr; /* set back to the given block address */ 152 bh->b_blocknr = blocknr; /* set back to the given block address */
153 *submit_ptr = pblocknr;
141 err = 0; 154 err = 0;
142found: 155found:
143 *pbh = bh; 156 *pbh = bh;
144 157
145out_locked: 158out_locked:
146 unlock_page(bh->b_page); 159 unlock_page(page);
147 page_cache_release(bh->b_page); 160 page_cache_release(page);
148 return err; 161 return err;
149} 162}
150 163
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 07da83f07712..79037494f1e0 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -42,8 +42,8 @@ void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 42void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
44 __u64 blocknr); 44 __u64 blocknr);
45int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, 45int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int,
46 struct buffer_head **); 46 struct buffer_head **, sector_t *);
47void nilfs_btnode_delete(struct buffer_head *); 47void nilfs_btnode_delete(struct buffer_head *);
48int nilfs_btnode_prepare_change_key(struct address_space *, 48int nilfs_btnode_prepare_change_key(struct address_space *,
49 struct nilfs_btnode_chkey_ctxt *); 49 struct nilfs_btnode_chkey_ctxt *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b27a342c5af6..300c2bc00c3f 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -66,30 +66,10 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path)
66/* 66/*
67 * B-tree node operations 67 * B-tree node operations
68 */ 68 */
69static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr, 69static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
70 struct buffer_head **bhp)
71{
72 struct address_space *btnc =
73 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
74 int err;
75
76 err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
77 if (err)
78 return err == -EEXIST ? 0 : err;
79
80 wait_on_buffer(*bhp);
81 if (!buffer_uptodate(*bhp)) {
82 brelse(*bhp);
83 return -EIO;
84 }
85 return 0;
86}
87
88static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
89 __u64 ptr, struct buffer_head **bhp) 70 __u64 ptr, struct buffer_head **bhp)
90{ 71{
91 struct address_space *btnc = 72 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
92 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
93 struct buffer_head *bh; 73 struct buffer_head *bh;
94 74
95 bh = nilfs_btnode_create_block(btnc, ptr); 75 bh = nilfs_btnode_create_block(btnc, ptr);
@@ -101,71 +81,55 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
101 return 0; 81 return 0;
102} 82}
103 83
104static inline int 84static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
105nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
106{ 85{
107 return node->bn_flags; 86 return node->bn_flags;
108} 87}
109 88
110static inline void 89static void
111nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 90nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
112{ 91{
113 node->bn_flags = flags; 92 node->bn_flags = flags;
114} 93}
115 94
116static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node) 95static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
117{ 96{
118 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 97 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
119} 98}
120 99
121static inline int 100static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
122nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
123{ 101{
124 return node->bn_level; 102 return node->bn_level;
125} 103}
126 104
127static inline void 105static void
128nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 106nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
129{ 107{
130 node->bn_level = level; 108 node->bn_level = level;
131} 109}
132 110
133static inline int 111static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
134nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
135{ 112{
136 return le16_to_cpu(node->bn_nchildren); 113 return le16_to_cpu(node->bn_nchildren);
137} 114}
138 115
139static inline void 116static void
140nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 117nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
141{ 118{
142 node->bn_nchildren = cpu_to_le16(nchildren); 119 node->bn_nchildren = cpu_to_le16(nchildren);
143} 120}
144 121
145static inline int nilfs_btree_node_size(const struct nilfs_btree *btree) 122static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
146{ 123{
147 return 1 << btree->bt_bmap.b_inode->i_blkbits; 124 return 1 << btree->b_inode->i_blkbits;
148} 125}
149 126
150static inline int 127static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
151nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
152 const struct nilfs_btree *btree)
153{ 128{
154 return nilfs_btree_node_root(node) ? 129 return btree->b_nchildren_per_block;
155 NILFS_BTREE_ROOT_NCHILDREN_MIN :
156 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
157} 130}
158 131
159static inline int 132static __le64 *
160nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
161 const struct nilfs_btree *btree)
162{
163 return nilfs_btree_node_root(node) ?
164 NILFS_BTREE_ROOT_NCHILDREN_MAX :
165 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
166}
167
168static inline __le64 *
169nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 133nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
170{ 134{
171 return (__le64 *)((char *)(node + 1) + 135 return (__le64 *)((char *)(node + 1) +
@@ -173,45 +137,40 @@ nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
173 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 137 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
174} 138}
175 139
176static inline __le64 * 140static __le64 *
177nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, 141nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
178 const struct nilfs_btree *btree)
179{ 142{
180 return (__le64 *)(nilfs_btree_node_dkeys(node) + 143 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
181 nilfs_btree_node_nchildren_max(node, btree));
182} 144}
183 145
184static inline __u64 146static __u64
185nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 147nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
186{ 148{
187 return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index)); 149 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
188} 150}
189 151
190static inline void 152static void
191nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 153nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
192{ 154{
193 *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key); 155 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
194} 156}
195 157
196static inline __u64 158static __u64
197nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, 159nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
198 const struct nilfs_btree_node *node, int index) 160 int ncmax)
199{ 161{
200 return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) + 162 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
201 index));
202} 163}
203 164
204static inline void 165static void
205nilfs_btree_node_set_ptr(struct nilfs_btree *btree, 166nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
206 struct nilfs_btree_node *node, int index, __u64 ptr) 167 int ncmax)
207{ 168{
208 *(nilfs_btree_node_dptrs(node, btree) + index) = 169 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
209 nilfs_bmap_ptr_to_dptr(ptr);
210} 170}
211 171
212static void nilfs_btree_node_init(struct nilfs_btree *btree, 172static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
213 struct nilfs_btree_node *node, 173 int level, int nchildren, int ncmax,
214 int flags, int level, int nchildren,
215 const __u64 *keys, const __u64 *ptrs) 174 const __u64 *keys, const __u64 *ptrs)
216{ 175{
217 __le64 *dkeys; 176 __le64 *dkeys;
@@ -223,29 +182,28 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree,
223 nilfs_btree_node_set_nchildren(node, nchildren); 182 nilfs_btree_node_set_nchildren(node, nchildren);
224 183
225 dkeys = nilfs_btree_node_dkeys(node); 184 dkeys = nilfs_btree_node_dkeys(node);
226 dptrs = nilfs_btree_node_dptrs(node, btree); 185 dptrs = nilfs_btree_node_dptrs(node, ncmax);
227 for (i = 0; i < nchildren; i++) { 186 for (i = 0; i < nchildren; i++) {
228 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); 187 dkeys[i] = cpu_to_le64(keys[i]);
229 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); 188 dptrs[i] = cpu_to_le64(ptrs[i]);
230 } 189 }
231} 190}
232 191
233/* Assume the buffer heads corresponding to left and right are locked. */ 192/* Assume the buffer heads corresponding to left and right are locked. */
234static void nilfs_btree_node_move_left(struct nilfs_btree *btree, 193static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
235 struct nilfs_btree_node *left,
236 struct nilfs_btree_node *right, 194 struct nilfs_btree_node *right,
237 int n) 195 int n, int lncmax, int rncmax)
238{ 196{
239 __le64 *ldkeys, *rdkeys; 197 __le64 *ldkeys, *rdkeys;
240 __le64 *ldptrs, *rdptrs; 198 __le64 *ldptrs, *rdptrs;
241 int lnchildren, rnchildren; 199 int lnchildren, rnchildren;
242 200
243 ldkeys = nilfs_btree_node_dkeys(left); 201 ldkeys = nilfs_btree_node_dkeys(left);
244 ldptrs = nilfs_btree_node_dptrs(left, btree); 202 ldptrs = nilfs_btree_node_dptrs(left, lncmax);
245 lnchildren = nilfs_btree_node_get_nchildren(left); 203 lnchildren = nilfs_btree_node_get_nchildren(left);
246 204
247 rdkeys = nilfs_btree_node_dkeys(right); 205 rdkeys = nilfs_btree_node_dkeys(right);
248 rdptrs = nilfs_btree_node_dptrs(right, btree); 206 rdptrs = nilfs_btree_node_dptrs(right, rncmax);
249 rnchildren = nilfs_btree_node_get_nchildren(right); 207 rnchildren = nilfs_btree_node_get_nchildren(right);
250 208
251 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 209 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
@@ -260,21 +218,20 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
260} 218}
261 219
262/* Assume that the buffer heads corresponding to left and right are locked. */ 220/* Assume that the buffer heads corresponding to left and right are locked. */
263static void nilfs_btree_node_move_right(struct nilfs_btree *btree, 221static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
264 struct nilfs_btree_node *left,
265 struct nilfs_btree_node *right, 222 struct nilfs_btree_node *right,
266 int n) 223 int n, int lncmax, int rncmax)
267{ 224{
268 __le64 *ldkeys, *rdkeys; 225 __le64 *ldkeys, *rdkeys;
269 __le64 *ldptrs, *rdptrs; 226 __le64 *ldptrs, *rdptrs;
270 int lnchildren, rnchildren; 227 int lnchildren, rnchildren;
271 228
272 ldkeys = nilfs_btree_node_dkeys(left); 229 ldkeys = nilfs_btree_node_dkeys(left);
273 ldptrs = nilfs_btree_node_dptrs(left, btree); 230 ldptrs = nilfs_btree_node_dptrs(left, lncmax);
274 lnchildren = nilfs_btree_node_get_nchildren(left); 231 lnchildren = nilfs_btree_node_get_nchildren(left);
275 232
276 rdkeys = nilfs_btree_node_dkeys(right); 233 rdkeys = nilfs_btree_node_dkeys(right);
277 rdptrs = nilfs_btree_node_dptrs(right, btree); 234 rdptrs = nilfs_btree_node_dptrs(right, rncmax);
278 rnchildren = nilfs_btree_node_get_nchildren(right); 235 rnchildren = nilfs_btree_node_get_nchildren(right);
279 236
280 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 237 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
@@ -289,16 +246,15 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
289} 246}
290 247
291/* Assume that the buffer head corresponding to node is locked. */ 248/* Assume that the buffer head corresponding to node is locked. */
292static void nilfs_btree_node_insert(struct nilfs_btree *btree, 249static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
293 struct nilfs_btree_node *node, 250 __u64 key, __u64 ptr, int ncmax)
294 __u64 key, __u64 ptr, int index)
295{ 251{
296 __le64 *dkeys; 252 __le64 *dkeys;
297 __le64 *dptrs; 253 __le64 *dptrs;
298 int nchildren; 254 int nchildren;
299 255
300 dkeys = nilfs_btree_node_dkeys(node); 256 dkeys = nilfs_btree_node_dkeys(node);
301 dptrs = nilfs_btree_node_dptrs(node, btree); 257 dptrs = nilfs_btree_node_dptrs(node, ncmax);
302 nchildren = nilfs_btree_node_get_nchildren(node); 258 nchildren = nilfs_btree_node_get_nchildren(node);
303 if (index < nchildren) { 259 if (index < nchildren) {
304 memmove(dkeys + index + 1, dkeys + index, 260 memmove(dkeys + index + 1, dkeys + index,
@@ -306,16 +262,15 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
306 memmove(dptrs + index + 1, dptrs + index, 262 memmove(dptrs + index + 1, dptrs + index,
307 (nchildren - index) * sizeof(*dptrs)); 263 (nchildren - index) * sizeof(*dptrs));
308 } 264 }
309 dkeys[index] = nilfs_bmap_key_to_dkey(key); 265 dkeys[index] = cpu_to_le64(key);
310 dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); 266 dptrs[index] = cpu_to_le64(ptr);
311 nchildren++; 267 nchildren++;
312 nilfs_btree_node_set_nchildren(node, nchildren); 268 nilfs_btree_node_set_nchildren(node, nchildren);
313} 269}
314 270
315/* Assume that the buffer head corresponding to node is locked. */ 271/* Assume that the buffer head corresponding to node is locked. */
316static void nilfs_btree_node_delete(struct nilfs_btree *btree, 272static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
317 struct nilfs_btree_node *node, 273 __u64 *keyp, __u64 *ptrp, int ncmax)
318 __u64 *keyp, __u64 *ptrp, int index)
319{ 274{
320 __u64 key; 275 __u64 key;
321 __u64 ptr; 276 __u64 ptr;
@@ -324,9 +279,9 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
324 int nchildren; 279 int nchildren;
325 280
326 dkeys = nilfs_btree_node_dkeys(node); 281 dkeys = nilfs_btree_node_dkeys(node);
327 dptrs = nilfs_btree_node_dptrs(node, btree); 282 dptrs = nilfs_btree_node_dptrs(node, ncmax);
328 key = nilfs_bmap_dkey_to_key(dkeys[index]); 283 key = le64_to_cpu(dkeys[index]);
329 ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); 284 ptr = le64_to_cpu(dptrs[index]);
330 nchildren = nilfs_btree_node_get_nchildren(node); 285 nchildren = nilfs_btree_node_get_nchildren(node);
331 if (keyp != NULL) 286 if (keyp != NULL)
332 *keyp = key; 287 *keyp = key;
@@ -382,40 +337,92 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
382 return s == 0; 337 return s == 0;
383} 338}
384 339
385static inline struct nilfs_btree_node * 340/**
386nilfs_btree_get_root(const struct nilfs_btree *btree) 341 * nilfs_btree_node_broken - verify consistency of btree node
342 * @node: btree node block to be examined
343 * @size: node size (in bytes)
344 * @blocknr: block number
345 *
346 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
347 */
348static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
349 size_t size, sector_t blocknr)
387{ 350{
388 return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data; 351 int level, flags, nchildren;
352 int ret = 0;
353
354 level = nilfs_btree_node_get_level(node);
355 flags = nilfs_btree_node_get_flags(node);
356 nchildren = nilfs_btree_node_get_nchildren(node);
357
358 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
359 level >= NILFS_BTREE_LEVEL_MAX ||
360 (flags & NILFS_BTREE_NODE_ROOT) ||
361 nchildren < 0 ||
362 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
363 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
364 "level = %d, flags = 0x%x, nchildren = %d\n",
365 (unsigned long long)blocknr, level, flags, nchildren);
366 ret = 1;
367 }
368 return ret;
389} 369}
390 370
391static inline struct nilfs_btree_node * 371int nilfs_btree_broken_node_block(struct buffer_head *bh)
372{
373 int ret;
374
375 if (buffer_nilfs_checked(bh))
376 return 0;
377
378 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
379 bh->b_size, bh->b_blocknr);
380 if (likely(!ret))
381 set_buffer_nilfs_checked(bh);
382 return ret;
383}
384
385static struct nilfs_btree_node *
386nilfs_btree_get_root(const struct nilfs_bmap *btree)
387{
388 return (struct nilfs_btree_node *)btree->b_u.u_data;
389}
390
391static struct nilfs_btree_node *
392nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 392nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
393{ 393{
394 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 394 return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
395} 395}
396 396
397static inline struct nilfs_btree_node * 397static struct nilfs_btree_node *
398nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 398nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
399{ 399{
400 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 400 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
401} 401}
402 402
403static inline int nilfs_btree_height(const struct nilfs_btree *btree) 403static int nilfs_btree_height(const struct nilfs_bmap *btree)
404{ 404{
405 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 405 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
406} 406}
407 407
408static inline struct nilfs_btree_node * 408static struct nilfs_btree_node *
409nilfs_btree_get_node(const struct nilfs_btree *btree, 409nilfs_btree_get_node(const struct nilfs_bmap *btree,
410 const struct nilfs_btree_path *path, 410 const struct nilfs_btree_path *path,
411 int level) 411 int level, int *ncmaxp)
412{ 412{
413 return (level == nilfs_btree_height(btree) - 1) ? 413 struct nilfs_btree_node *node;
414 nilfs_btree_get_root(btree) : 414
415 nilfs_btree_get_nonroot_node(path, level); 415 if (level == nilfs_btree_height(btree) - 1) {
416 node = nilfs_btree_get_root(btree);
417 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
418 } else {
419 node = nilfs_btree_get_nonroot_node(path, level);
420 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
421 }
422 return node;
416} 423}
417 424
418static inline int 425static int
419nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) 426nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
420{ 427{
421 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 428 if (unlikely(nilfs_btree_node_get_level(node) != level)) {
@@ -427,13 +434,83 @@ nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
427 return 0; 434 return 0;
428} 435}
429 436
430static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, 437struct nilfs_btree_readahead_info {
438 struct nilfs_btree_node *node; /* parent node */
439 int max_ra_blocks; /* max nof blocks to read ahead */
440 int index; /* current index on the parent node */
441 int ncmax; /* nof children in the parent node */
442};
443
444static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
445 struct buffer_head **bhp,
446 const struct nilfs_btree_readahead_info *ra)
447{
448 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
449 struct buffer_head *bh, *ra_bh;
450 sector_t submit_ptr = 0;
451 int ret;
452
453 ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
454 if (ret) {
455 if (ret != -EEXIST)
456 return ret;
457 goto out_check;
458 }
459
460 if (ra) {
461 int i, n;
462 __u64 ptr2;
463
464 /* read ahead sibling nodes */
465 for (n = ra->max_ra_blocks, i = ra->index + 1;
466 n > 0 && i < ra->ncmax; n--, i++) {
467 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
468
469 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
470 &ra_bh, &submit_ptr);
471 if (likely(!ret || ret == -EEXIST))
472 brelse(ra_bh);
473 else if (ret != -EBUSY)
474 break;
475 if (!buffer_locked(bh))
476 goto out_no_wait;
477 }
478 }
479
480 wait_on_buffer(bh);
481
482 out_no_wait:
483 if (!buffer_uptodate(bh)) {
484 brelse(bh);
485 return -EIO;
486 }
487
488 out_check:
489 if (nilfs_btree_broken_node_block(bh)) {
490 clear_buffer_uptodate(bh);
491 brelse(bh);
492 return -EINVAL;
493 }
494
495 *bhp = bh;
496 return 0;
497}
498
499static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
500 struct buffer_head **bhp)
501{
502 return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
503}
504
505static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
431 struct nilfs_btree_path *path, 506 struct nilfs_btree_path *path,
432 __u64 key, __u64 *ptrp, int minlevel) 507 __u64 key, __u64 *ptrp, int minlevel,
508 int readahead)
433{ 509{
434 struct nilfs_btree_node *node; 510 struct nilfs_btree_node *node;
511 struct nilfs_btree_readahead_info p, *ra;
435 __u64 ptr; 512 __u64 ptr;
436 int level, index, found, ret; 513 int level, index, found, ncmax, ret;
437 514
438 node = nilfs_btree_get_root(btree); 515 node = nilfs_btree_get_root(btree);
439 level = nilfs_btree_node_get_level(node); 516 level = nilfs_btree_node_get_level(node);
@@ -441,14 +518,27 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
441 return -ENOENT; 518 return -ENOENT;
442 519
443 found = nilfs_btree_node_lookup(node, key, &index); 520 found = nilfs_btree_node_lookup(node, key, &index);
444 ptr = nilfs_btree_node_get_ptr(btree, node, index); 521 ptr = nilfs_btree_node_get_ptr(node, index,
522 NILFS_BTREE_ROOT_NCHILDREN_MAX);
445 path[level].bp_bh = NULL; 523 path[level].bp_bh = NULL;
446 path[level].bp_index = index; 524 path[level].bp_index = index;
447 525
448 for (level--; level >= minlevel; level--) { 526 ncmax = nilfs_btree_nchildren_per_block(btree);
449 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 527
528 while (--level >= minlevel) {
529 ra = NULL;
530 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
531 p.node = nilfs_btree_get_node(btree, path, level + 1,
532 &p.ncmax);
533 p.index = index;
534 p.max_ra_blocks = 7;
535 ra = &p;
536 }
537 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
538 ra);
450 if (ret < 0) 539 if (ret < 0)
451 return ret; 540 return ret;
541
452 node = nilfs_btree_get_nonroot_node(path, level); 542 node = nilfs_btree_get_nonroot_node(path, level);
453 if (nilfs_btree_bad_node(node, level)) 543 if (nilfs_btree_bad_node(node, level))
454 return -EINVAL; 544 return -EINVAL;
@@ -456,9 +546,9 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
456 found = nilfs_btree_node_lookup(node, key, &index); 546 found = nilfs_btree_node_lookup(node, key, &index);
457 else 547 else
458 index = 0; 548 index = 0;
459 if (index < nilfs_btree_node_nchildren_max(node, btree)) 549 if (index < ncmax) {
460 ptr = nilfs_btree_node_get_ptr(btree, node, index); 550 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
461 else { 551 } else {
462 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 552 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
463 /* insert */ 553 /* insert */
464 ptr = NILFS_BMAP_INVALID_PTR; 554 ptr = NILFS_BMAP_INVALID_PTR;
@@ -474,22 +564,24 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
474 return 0; 564 return 0;
475} 565}
476 566
477static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, 567static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
478 struct nilfs_btree_path *path, 568 struct nilfs_btree_path *path,
479 __u64 *keyp, __u64 *ptrp) 569 __u64 *keyp, __u64 *ptrp)
480{ 570{
481 struct nilfs_btree_node *node; 571 struct nilfs_btree_node *node;
482 __u64 ptr; 572 __u64 ptr;
483 int index, level, ret; 573 int index, level, ncmax, ret;
484 574
485 node = nilfs_btree_get_root(btree); 575 node = nilfs_btree_get_root(btree);
486 index = nilfs_btree_node_get_nchildren(node) - 1; 576 index = nilfs_btree_node_get_nchildren(node) - 1;
487 if (index < 0) 577 if (index < 0)
488 return -ENOENT; 578 return -ENOENT;
489 level = nilfs_btree_node_get_level(node); 579 level = nilfs_btree_node_get_level(node);
490 ptr = nilfs_btree_node_get_ptr(btree, node, index); 580 ptr = nilfs_btree_node_get_ptr(node, index,
581 NILFS_BTREE_ROOT_NCHILDREN_MAX);
491 path[level].bp_bh = NULL; 582 path[level].bp_bh = NULL;
492 path[level].bp_index = index; 583 path[level].bp_index = index;
584 ncmax = nilfs_btree_nchildren_per_block(btree);
493 585
494 for (level--; level > 0; level--) { 586 for (level--; level > 0; level--) {
495 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 587 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
@@ -499,7 +591,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
499 if (nilfs_btree_bad_node(node, level)) 591 if (nilfs_btree_bad_node(node, level))
500 return -EINVAL; 592 return -EINVAL;
501 index = nilfs_btree_node_get_nchildren(node) - 1; 593 index = nilfs_btree_node_get_nchildren(node) - 1;
502 ptr = nilfs_btree_node_get_ptr(btree, node, index); 594 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
503 path[level].bp_index = index; 595 path[level].bp_index = index;
504 } 596 }
505 597
@@ -511,51 +603,45 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
511 return 0; 603 return 0;
512} 604}
513 605
514static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, 606static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
515 __u64 key, int level, __u64 *ptrp) 607 __u64 key, int level, __u64 *ptrp)
516{ 608{
517 struct nilfs_btree *btree;
518 struct nilfs_btree_path *path; 609 struct nilfs_btree_path *path;
519 __u64 ptr;
520 int ret; 610 int ret;
521 611
522 btree = (struct nilfs_btree *)bmap;
523 path = nilfs_btree_alloc_path(); 612 path = nilfs_btree_alloc_path();
524 if (path == NULL) 613 if (path == NULL)
525 return -ENOMEM; 614 return -ENOMEM;
526 615
527 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 616 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
528
529 if (ptrp != NULL)
530 *ptrp = ptr;
531 617
532 nilfs_btree_free_path(path); 618 nilfs_btree_free_path(path);
533 619
534 return ret; 620 return ret;
535} 621}
536 622
537static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, 623static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
538 __u64 key, __u64 *ptrp, unsigned maxblocks) 624 __u64 key, __u64 *ptrp, unsigned maxblocks)
539{ 625{
540 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
541 struct nilfs_btree_path *path; 626 struct nilfs_btree_path *path;
542 struct nilfs_btree_node *node; 627 struct nilfs_btree_node *node;
543 struct inode *dat = NULL; 628 struct inode *dat = NULL;
544 __u64 ptr, ptr2; 629 __u64 ptr, ptr2;
545 sector_t blocknr; 630 sector_t blocknr;
546 int level = NILFS_BTREE_LEVEL_NODE_MIN; 631 int level = NILFS_BTREE_LEVEL_NODE_MIN;
547 int ret, cnt, index, maxlevel; 632 int ret, cnt, index, maxlevel, ncmax;
633 struct nilfs_btree_readahead_info p;
548 634
549 path = nilfs_btree_alloc_path(); 635 path = nilfs_btree_alloc_path();
550 if (path == NULL) 636 if (path == NULL)
551 return -ENOMEM; 637 return -ENOMEM;
552 638
553 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 639 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
554 if (ret < 0) 640 if (ret < 0)
555 goto out; 641 goto out;
556 642
557 if (NILFS_BMAP_USE_VBN(bmap)) { 643 if (NILFS_BMAP_USE_VBN(btree)) {
558 dat = nilfs_bmap_get_dat(bmap); 644 dat = nilfs_bmap_get_dat(btree);
559 ret = nilfs_dat_translate(dat, ptr, &blocknr); 645 ret = nilfs_dat_translate(dat, ptr, &blocknr);
560 if (ret < 0) 646 if (ret < 0)
561 goto out; 647 goto out;
@@ -566,14 +652,14 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
566 goto end; 652 goto end;
567 653
568 maxlevel = nilfs_btree_height(btree) - 1; 654 maxlevel = nilfs_btree_height(btree) - 1;
569 node = nilfs_btree_get_node(btree, path, level); 655 node = nilfs_btree_get_node(btree, path, level, &ncmax);
570 index = path[level].bp_index + 1; 656 index = path[level].bp_index + 1;
571 for (;;) { 657 for (;;) {
572 while (index < nilfs_btree_node_get_nchildren(node)) { 658 while (index < nilfs_btree_node_get_nchildren(node)) {
573 if (nilfs_btree_node_get_key(node, index) != 659 if (nilfs_btree_node_get_key(node, index) !=
574 key + cnt) 660 key + cnt)
575 goto end; 661 goto end;
576 ptr2 = nilfs_btree_node_get_ptr(btree, node, index); 662 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
577 if (dat) { 663 if (dat) {
578 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 664 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
579 if (ret < 0) 665 if (ret < 0)
@@ -589,20 +675,24 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
589 break; 675 break;
590 676
591 /* look-up right sibling node */ 677 /* look-up right sibling node */
592 node = nilfs_btree_get_node(btree, path, level + 1); 678 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
593 index = path[level + 1].bp_index + 1; 679 p.index = path[level + 1].bp_index + 1;
594 if (index >= nilfs_btree_node_get_nchildren(node) || 680 p.max_ra_blocks = 7;
595 nilfs_btree_node_get_key(node, index) != key + cnt) 681 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
682 nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
596 break; 683 break;
597 ptr2 = nilfs_btree_node_get_ptr(btree, node, index); 684 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
598 path[level + 1].bp_index = index; 685 path[level + 1].bp_index = p.index;
599 686
600 brelse(path[level].bp_bh); 687 brelse(path[level].bp_bh);
601 path[level].bp_bh = NULL; 688 path[level].bp_bh = NULL;
602 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); 689
690 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
691 &p);
603 if (ret < 0) 692 if (ret < 0)
604 goto out; 693 goto out;
605 node = nilfs_btree_get_nonroot_node(path, level); 694 node = nilfs_btree_get_nonroot_node(path, level);
695 ncmax = nilfs_btree_nchildren_per_block(btree);
606 index = 0; 696 index = 0;
607 path[level].bp_index = index; 697 path[level].bp_index = index;
608 } 698 }
@@ -614,7 +704,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
614 return ret; 704 return ret;
615} 705}
616 706
617static void nilfs_btree_promote_key(struct nilfs_btree *btree, 707static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
618 struct nilfs_btree_path *path, 708 struct nilfs_btree_path *path,
619 int level, __u64 key) 709 int level, __u64 key)
620{ 710{
@@ -636,16 +726,18 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
636 } 726 }
637} 727}
638 728
639static void nilfs_btree_do_insert(struct nilfs_btree *btree, 729static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
640 struct nilfs_btree_path *path, 730 struct nilfs_btree_path *path,
641 int level, __u64 *keyp, __u64 *ptrp) 731 int level, __u64 *keyp, __u64 *ptrp)
642{ 732{
643 struct nilfs_btree_node *node; 733 struct nilfs_btree_node *node;
734 int ncblk;
644 735
645 if (level < nilfs_btree_height(btree) - 1) { 736 if (level < nilfs_btree_height(btree) - 1) {
646 node = nilfs_btree_get_nonroot_node(path, level); 737 node = nilfs_btree_get_nonroot_node(path, level);
647 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 738 ncblk = nilfs_btree_nchildren_per_block(btree);
648 path[level].bp_index); 739 nilfs_btree_node_insert(node, path[level].bp_index,
740 *keyp, *ptrp, ncblk);
649 if (!buffer_dirty(path[level].bp_bh)) 741 if (!buffer_dirty(path[level].bp_bh))
650 nilfs_btnode_mark_dirty(path[level].bp_bh); 742 nilfs_btnode_mark_dirty(path[level].bp_bh);
651 743
@@ -655,22 +747,24 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
655 0)); 747 0));
656 } else { 748 } else {
657 node = nilfs_btree_get_root(btree); 749 node = nilfs_btree_get_root(btree);
658 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 750 nilfs_btree_node_insert(node, path[level].bp_index,
659 path[level].bp_index); 751 *keyp, *ptrp,
752 NILFS_BTREE_ROOT_NCHILDREN_MAX);
660 } 753 }
661} 754}
662 755
663static void nilfs_btree_carry_left(struct nilfs_btree *btree, 756static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
664 struct nilfs_btree_path *path, 757 struct nilfs_btree_path *path,
665 int level, __u64 *keyp, __u64 *ptrp) 758 int level, __u64 *keyp, __u64 *ptrp)
666{ 759{
667 struct nilfs_btree_node *node, *left; 760 struct nilfs_btree_node *node, *left;
668 int nchildren, lnchildren, n, move; 761 int nchildren, lnchildren, n, move, ncblk;
669 762
670 node = nilfs_btree_get_nonroot_node(path, level); 763 node = nilfs_btree_get_nonroot_node(path, level);
671 left = nilfs_btree_get_sib_node(path, level); 764 left = nilfs_btree_get_sib_node(path, level);
672 nchildren = nilfs_btree_node_get_nchildren(node); 765 nchildren = nilfs_btree_node_get_nchildren(node);
673 lnchildren = nilfs_btree_node_get_nchildren(left); 766 lnchildren = nilfs_btree_node_get_nchildren(left);
767 ncblk = nilfs_btree_nchildren_per_block(btree);
674 move = 0; 768 move = 0;
675 769
676 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 770 n = (nchildren + lnchildren + 1) / 2 - lnchildren;
@@ -680,7 +774,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
680 move = 1; 774 move = 1;
681 } 775 }
682 776
683 nilfs_btree_node_move_left(btree, left, node, n); 777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
684 778
685 if (!buffer_dirty(path[level].bp_bh)) 779 if (!buffer_dirty(path[level].bp_bh))
686 nilfs_btnode_mark_dirty(path[level].bp_bh); 780 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -705,17 +799,18 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
705 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 799 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
706} 800}
707 801
708static void nilfs_btree_carry_right(struct nilfs_btree *btree, 802static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
709 struct nilfs_btree_path *path, 803 struct nilfs_btree_path *path,
710 int level, __u64 *keyp, __u64 *ptrp) 804 int level, __u64 *keyp, __u64 *ptrp)
711{ 805{
712 struct nilfs_btree_node *node, *right; 806 struct nilfs_btree_node *node, *right;
713 int nchildren, rnchildren, n, move; 807 int nchildren, rnchildren, n, move, ncblk;
714 808
715 node = nilfs_btree_get_nonroot_node(path, level); 809 node = nilfs_btree_get_nonroot_node(path, level);
716 right = nilfs_btree_get_sib_node(path, level); 810 right = nilfs_btree_get_sib_node(path, level);
717 nchildren = nilfs_btree_node_get_nchildren(node); 811 nchildren = nilfs_btree_node_get_nchildren(node);
718 rnchildren = nilfs_btree_node_get_nchildren(right); 812 rnchildren = nilfs_btree_node_get_nchildren(right);
813 ncblk = nilfs_btree_nchildren_per_block(btree);
719 move = 0; 814 move = 0;
720 815
721 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 816 n = (nchildren + rnchildren + 1) / 2 - rnchildren;
@@ -725,7 +820,7 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
725 move = 1; 820 move = 1;
726 } 821 }
727 822
728 nilfs_btree_node_move_right(btree, node, right, n); 823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
729 824
730 if (!buffer_dirty(path[level].bp_bh)) 825 if (!buffer_dirty(path[level].bp_bh))
731 nilfs_btnode_mark_dirty(path[level].bp_bh); 826 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -751,18 +846,19 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
751 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 846 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
752} 847}
753 848
754static void nilfs_btree_split(struct nilfs_btree *btree, 849static void nilfs_btree_split(struct nilfs_bmap *btree,
755 struct nilfs_btree_path *path, 850 struct nilfs_btree_path *path,
756 int level, __u64 *keyp, __u64 *ptrp) 851 int level, __u64 *keyp, __u64 *ptrp)
757{ 852{
758 struct nilfs_btree_node *node, *right; 853 struct nilfs_btree_node *node, *right;
759 __u64 newkey; 854 __u64 newkey;
760 __u64 newptr; 855 __u64 newptr;
761 int nchildren, n, move; 856 int nchildren, n, move, ncblk;
762 857
763 node = nilfs_btree_get_nonroot_node(path, level); 858 node = nilfs_btree_get_nonroot_node(path, level);
764 right = nilfs_btree_get_sib_node(path, level); 859 right = nilfs_btree_get_sib_node(path, level);
765 nchildren = nilfs_btree_node_get_nchildren(node); 860 nchildren = nilfs_btree_node_get_nchildren(node);
861 ncblk = nilfs_btree_nchildren_per_block(btree);
766 move = 0; 862 move = 0;
767 863
768 n = (nchildren + 1) / 2; 864 n = (nchildren + 1) / 2;
@@ -771,7 +867,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
771 move = 1; 867 move = 1;
772 } 868 }
773 869
774 nilfs_btree_node_move_right(btree, node, right, n); 870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
775 871
776 if (!buffer_dirty(path[level].bp_bh)) 872 if (!buffer_dirty(path[level].bp_bh))
777 nilfs_btnode_mark_dirty(path[level].bp_bh); 873 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -783,8 +879,8 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
783 879
784 if (move) { 880 if (move) {
785 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 881 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
786 nilfs_btree_node_insert(btree, right, *keyp, *ptrp, 882 nilfs_btree_node_insert(right, path[level].bp_index,
787 path[level].bp_index); 883 *keyp, *ptrp, ncblk);
788 884
789 *keyp = nilfs_btree_node_get_key(right, 0); 885 *keyp = nilfs_btree_node_get_key(right, 0);
790 *ptrp = path[level].bp_newreq.bpr_ptr; 886 *ptrp = path[level].bp_newreq.bpr_ptr;
@@ -805,19 +901,21 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
805 path[level + 1].bp_index++; 901 path[level + 1].bp_index++;
806} 902}
807 903
808static void nilfs_btree_grow(struct nilfs_btree *btree, 904static void nilfs_btree_grow(struct nilfs_bmap *btree,
809 struct nilfs_btree_path *path, 905 struct nilfs_btree_path *path,
810 int level, __u64 *keyp, __u64 *ptrp) 906 int level, __u64 *keyp, __u64 *ptrp)
811{ 907{
812 struct nilfs_btree_node *root, *child; 908 struct nilfs_btree_node *root, *child;
813 int n; 909 int n, ncblk;
814 910
815 root = nilfs_btree_get_root(btree); 911 root = nilfs_btree_get_root(btree);
816 child = nilfs_btree_get_sib_node(path, level); 912 child = nilfs_btree_get_sib_node(path, level);
913 ncblk = nilfs_btree_nchildren_per_block(btree);
817 914
818 n = nilfs_btree_node_get_nchildren(root); 915 n = nilfs_btree_node_get_nchildren(root);
819 916
820 nilfs_btree_node_move_right(btree, root, child, n); 917 nilfs_btree_node_move_right(root, child, n,
918 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
821 nilfs_btree_node_set_level(root, level + 1); 919 nilfs_btree_node_set_level(root, level + 1);
822 920
823 if (!buffer_dirty(path[level].bp_sib_bh)) 921 if (!buffer_dirty(path[level].bp_sib_bh))
@@ -832,11 +930,11 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
832 *ptrp = path[level].bp_newreq.bpr_ptr; 930 *ptrp = path[level].bp_newreq.bpr_ptr;
833} 931}
834 932
835static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, 933static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
836 const struct nilfs_btree_path *path) 934 const struct nilfs_btree_path *path)
837{ 935{
838 struct nilfs_btree_node *node; 936 struct nilfs_btree_node *node;
839 int level; 937 int level, ncmax;
840 938
841 if (path == NULL) 939 if (path == NULL)
842 return NILFS_BMAP_INVALID_PTR; 940 return NILFS_BMAP_INVALID_PTR;
@@ -844,29 +942,30 @@ static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
844 /* left sibling */ 942 /* left sibling */
845 level = NILFS_BTREE_LEVEL_NODE_MIN; 943 level = NILFS_BTREE_LEVEL_NODE_MIN;
846 if (path[level].bp_index > 0) { 944 if (path[level].bp_index > 0) {
847 node = nilfs_btree_get_node(btree, path, level); 945 node = nilfs_btree_get_node(btree, path, level, &ncmax);
848 return nilfs_btree_node_get_ptr(btree, node, 946 return nilfs_btree_node_get_ptr(node,
849 path[level].bp_index - 1); 947 path[level].bp_index - 1,
948 ncmax);
850 } 949 }
851 950
852 /* parent */ 951 /* parent */
853 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 952 level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
854 if (level <= nilfs_btree_height(btree) - 1) { 953 if (level <= nilfs_btree_height(btree) - 1) {
855 node = nilfs_btree_get_node(btree, path, level); 954 node = nilfs_btree_get_node(btree, path, level, &ncmax);
856 return nilfs_btree_node_get_ptr(btree, node, 955 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
857 path[level].bp_index); 956 ncmax);
858 } 957 }
859 958
860 return NILFS_BMAP_INVALID_PTR; 959 return NILFS_BMAP_INVALID_PTR;
861} 960}
862 961
863static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, 962static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
864 const struct nilfs_btree_path *path, 963 const struct nilfs_btree_path *path,
865 __u64 key) 964 __u64 key)
866{ 965{
867 __u64 ptr; 966 __u64 ptr;
868 967
869 ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key); 968 ptr = nilfs_bmap_find_target_seq(btree, key);
870 if (ptr != NILFS_BMAP_INVALID_PTR) 969 if (ptr != NILFS_BMAP_INVALID_PTR)
871 /* sequential access */ 970 /* sequential access */
872 return ptr; 971 return ptr;
@@ -877,17 +976,10 @@ static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
877 return ptr; 976 return ptr;
878 } 977 }
879 /* block group */ 978 /* block group */
880 return nilfs_bmap_find_target_in_group(&btree->bt_bmap); 979 return nilfs_bmap_find_target_in_group(btree);
881}
882
883static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
884 __u64 ptr)
885{
886 btree->bt_bmap.b_last_allocated_key = key;
887 btree->bt_bmap.b_last_allocated_ptr = ptr;
888} 980}
889 981
890static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, 982static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
891 struct nilfs_btree_path *path, 983 struct nilfs_btree_path *path,
892 int *levelp, __u64 key, __u64 ptr, 984 int *levelp, __u64 key, __u64 ptr,
893 struct nilfs_bmap_stats *stats) 985 struct nilfs_bmap_stats *stats)
@@ -895,79 +987,78 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
895 struct buffer_head *bh; 987 struct buffer_head *bh;
896 struct nilfs_btree_node *node, *parent, *sib; 988 struct nilfs_btree_node *node, *parent, *sib;
897 __u64 sibptr; 989 __u64 sibptr;
898 int pindex, level, ret; 990 int pindex, level, ncmax, ncblk, ret;
899 struct inode *dat = NULL; 991 struct inode *dat = NULL;
900 992
901 stats->bs_nblocks = 0; 993 stats->bs_nblocks = 0;
902 level = NILFS_BTREE_LEVEL_DATA; 994 level = NILFS_BTREE_LEVEL_DATA;
903 995
904 /* allocate a new ptr for data block */ 996 /* allocate a new ptr for data block */
905 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { 997 if (NILFS_BMAP_USE_VBN(btree)) {
906 path[level].bp_newreq.bpr_ptr = 998 path[level].bp_newreq.bpr_ptr =
907 nilfs_btree_find_target_v(btree, path, key); 999 nilfs_btree_find_target_v(btree, path, key);
908 dat = nilfs_bmap_get_dat(&btree->bt_bmap); 1000 dat = nilfs_bmap_get_dat(btree);
909 } 1001 }
910 1002
911 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 1003 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
912 &path[level].bp_newreq, dat);
913 if (ret < 0) 1004 if (ret < 0)
914 goto err_out_data; 1005 goto err_out_data;
915 1006
1007 ncblk = nilfs_btree_nchildren_per_block(btree);
1008
916 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1009 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
917 level < nilfs_btree_height(btree) - 1; 1010 level < nilfs_btree_height(btree) - 1;
918 level++) { 1011 level++) {
919 node = nilfs_btree_get_nonroot_node(path, level); 1012 node = nilfs_btree_get_nonroot_node(path, level);
920 if (nilfs_btree_node_get_nchildren(node) < 1013 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
921 nilfs_btree_node_nchildren_max(node, btree)) {
922 path[level].bp_op = nilfs_btree_do_insert; 1014 path[level].bp_op = nilfs_btree_do_insert;
923 stats->bs_nblocks++; 1015 stats->bs_nblocks++;
924 goto out; 1016 goto out;
925 } 1017 }
926 1018
927 parent = nilfs_btree_get_node(btree, path, level + 1); 1019 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
928 pindex = path[level + 1].bp_index; 1020 pindex = path[level + 1].bp_index;
929 1021
930 /* left sibling */ 1022 /* left sibling */
931 if (pindex > 0) { 1023 if (pindex > 0) {
932 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1024 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
933 pindex - 1); 1025 ncmax);
934 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1026 ret = nilfs_btree_get_block(btree, sibptr, &bh);
935 if (ret < 0) 1027 if (ret < 0)
936 goto err_out_child_node; 1028 goto err_out_child_node;
937 sib = (struct nilfs_btree_node *)bh->b_data; 1029 sib = (struct nilfs_btree_node *)bh->b_data;
938 if (nilfs_btree_node_get_nchildren(sib) < 1030 if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
939 nilfs_btree_node_nchildren_max(sib, btree)) {
940 path[level].bp_sib_bh = bh; 1031 path[level].bp_sib_bh = bh;
941 path[level].bp_op = nilfs_btree_carry_left; 1032 path[level].bp_op = nilfs_btree_carry_left;
942 stats->bs_nblocks++; 1033 stats->bs_nblocks++;
943 goto out; 1034 goto out;
944 } else 1035 } else {
945 brelse(bh); 1036 brelse(bh);
1037 }
946 } 1038 }
947 1039
948 /* right sibling */ 1040 /* right sibling */
949 if (pindex < 1041 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
950 nilfs_btree_node_get_nchildren(parent) - 1) { 1042 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
951 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1043 ncmax);
952 pindex + 1);
953 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1044 ret = nilfs_btree_get_block(btree, sibptr, &bh);
954 if (ret < 0) 1045 if (ret < 0)
955 goto err_out_child_node; 1046 goto err_out_child_node;
956 sib = (struct nilfs_btree_node *)bh->b_data; 1047 sib = (struct nilfs_btree_node *)bh->b_data;
957 if (nilfs_btree_node_get_nchildren(sib) < 1048 if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
958 nilfs_btree_node_nchildren_max(sib, btree)) {
959 path[level].bp_sib_bh = bh; 1049 path[level].bp_sib_bh = bh;
960 path[level].bp_op = nilfs_btree_carry_right; 1050 path[level].bp_op = nilfs_btree_carry_right;
961 stats->bs_nblocks++; 1051 stats->bs_nblocks++;
962 goto out; 1052 goto out;
963 } else 1053 } else {
964 brelse(bh); 1054 brelse(bh);
1055 }
965 } 1056 }
966 1057
967 /* split */ 1058 /* split */
968 path[level].bp_newreq.bpr_ptr = 1059 path[level].bp_newreq.bpr_ptr =
969 path[level - 1].bp_newreq.bpr_ptr + 1; 1060 path[level - 1].bp_newreq.bpr_ptr + 1;
970 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 1061 ret = nilfs_bmap_prepare_alloc_ptr(btree,
971 &path[level].bp_newreq, dat); 1062 &path[level].bp_newreq, dat);
972 if (ret < 0) 1063 if (ret < 0)
973 goto err_out_child_node; 1064 goto err_out_child_node;
@@ -979,9 +1070,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
979 1070
980 stats->bs_nblocks++; 1071 stats->bs_nblocks++;
981 1072
982 nilfs_btree_node_init(btree, 1073 sib = (struct nilfs_btree_node *)bh->b_data;
983 (struct nilfs_btree_node *)bh->b_data, 1074 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
984 0, level, 0, NULL, NULL);
985 path[level].bp_sib_bh = bh; 1075 path[level].bp_sib_bh = bh;
986 path[level].bp_op = nilfs_btree_split; 1076 path[level].bp_op = nilfs_btree_split;
987 } 1077 }
@@ -989,7 +1079,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
989 /* root */ 1079 /* root */
990 node = nilfs_btree_get_root(btree); 1080 node = nilfs_btree_get_root(btree);
991 if (nilfs_btree_node_get_nchildren(node) < 1081 if (nilfs_btree_node_get_nchildren(node) <
992 nilfs_btree_node_nchildren_max(node, btree)) { 1082 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
993 path[level].bp_op = nilfs_btree_do_insert; 1083 path[level].bp_op = nilfs_btree_do_insert;
994 stats->bs_nblocks++; 1084 stats->bs_nblocks++;
995 goto out; 1085 goto out;
@@ -997,8 +1087,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
997 1087
998 /* grow */ 1088 /* grow */
999 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1089 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1000 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 1090 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1001 &path[level].bp_newreq, dat);
1002 if (ret < 0) 1091 if (ret < 0)
1003 goto err_out_child_node; 1092 goto err_out_child_node;
1004 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1093 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
@@ -1006,8 +1095,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1006 if (ret < 0) 1095 if (ret < 0)
1007 goto err_out_curr_node; 1096 goto err_out_curr_node;
1008 1097
1009 nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, 1098 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1010 0, level, 0, NULL, NULL); 1099 0, level, 0, ncblk, NULL, NULL);
1011 path[level].bp_sib_bh = bh; 1100 path[level].bp_sib_bh = bh;
1012 path[level].bp_op = nilfs_btree_grow; 1101 path[level].bp_op = nilfs_btree_grow;
1013 1102
@@ -1024,25 +1113,22 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1024 1113
1025 /* error */ 1114 /* error */
1026 err_out_curr_node: 1115 err_out_curr_node:
1027 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, 1116 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1028 dat);
1029 err_out_child_node: 1117 err_out_child_node:
1030 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1118 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1031 nilfs_btnode_delete(path[level].bp_sib_bh); 1119 nilfs_btnode_delete(path[level].bp_sib_bh);
1032 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, 1120 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1033 &path[level].bp_newreq, dat);
1034 1121
1035 } 1122 }
1036 1123
1037 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, 1124 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1038 dat);
1039 err_out_data: 1125 err_out_data:
1040 *levelp = level; 1126 *levelp = level;
1041 stats->bs_nblocks = 0; 1127 stats->bs_nblocks = 0;
1042 return ret; 1128 return ret;
1043} 1129}
1044 1130
1045static void nilfs_btree_commit_insert(struct nilfs_btree *btree, 1131static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1046 struct nilfs_btree_path *path, 1132 struct nilfs_btree_path *path,
1047 int maxlevel, __u64 key, __u64 ptr) 1133 int maxlevel, __u64 key, __u64 ptr)
1048{ 1134{
@@ -1051,35 +1137,33 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1051 1137
1052 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1138 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1053 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1139 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1054 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { 1140 if (NILFS_BMAP_USE_VBN(btree)) {
1055 nilfs_btree_set_target_v(btree, key, ptr); 1141 nilfs_bmap_set_target_v(btree, key, ptr);
1056 dat = nilfs_bmap_get_dat(&btree->bt_bmap); 1142 dat = nilfs_bmap_get_dat(btree);
1057 } 1143 }
1058 1144
1059 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1145 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1060 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, 1146 nilfs_bmap_commit_alloc_ptr(btree,
1061 &path[level - 1].bp_newreq, dat); 1147 &path[level - 1].bp_newreq, dat);
1062 path[level].bp_op(btree, path, level, &key, &ptr); 1148 path[level].bp_op(btree, path, level, &key, &ptr);
1063 } 1149 }
1064 1150
1065 if (!nilfs_bmap_dirty(&btree->bt_bmap)) 1151 if (!nilfs_bmap_dirty(btree))
1066 nilfs_bmap_set_dirty(&btree->bt_bmap); 1152 nilfs_bmap_set_dirty(btree);
1067} 1153}
1068 1154
1069static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 1155static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1070{ 1156{
1071 struct nilfs_btree *btree;
1072 struct nilfs_btree_path *path; 1157 struct nilfs_btree_path *path;
1073 struct nilfs_bmap_stats stats; 1158 struct nilfs_bmap_stats stats;
1074 int level, ret; 1159 int level, ret;
1075 1160
1076 btree = (struct nilfs_btree *)bmap;
1077 path = nilfs_btree_alloc_path(); 1161 path = nilfs_btree_alloc_path();
1078 if (path == NULL) 1162 if (path == NULL)
1079 return -ENOMEM; 1163 return -ENOMEM;
1080 1164
1081 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1165 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1082 NILFS_BTREE_LEVEL_NODE_MIN); 1166 NILFS_BTREE_LEVEL_NODE_MIN, 0);
1083 if (ret != -ENOENT) { 1167 if (ret != -ENOENT) {
1084 if (ret == 0) 1168 if (ret == 0)
1085 ret = -EEXIST; 1169 ret = -EEXIST;
@@ -1090,23 +1174,25 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1090 if (ret < 0) 1174 if (ret < 0)
1091 goto out; 1175 goto out;
1092 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1176 nilfs_btree_commit_insert(btree, path, level, key, ptr);
1093 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 1177 nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1094 1178
1095 out: 1179 out:
1096 nilfs_btree_free_path(path); 1180 nilfs_btree_free_path(path);
1097 return ret; 1181 return ret;
1098} 1182}
1099 1183
1100static void nilfs_btree_do_delete(struct nilfs_btree *btree, 1184static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1101 struct nilfs_btree_path *path, 1185 struct nilfs_btree_path *path,
1102 int level, __u64 *keyp, __u64 *ptrp) 1186 int level, __u64 *keyp, __u64 *ptrp)
1103{ 1187{
1104 struct nilfs_btree_node *node; 1188 struct nilfs_btree_node *node;
1189 int ncblk;
1105 1190
1106 if (level < nilfs_btree_height(btree) - 1) { 1191 if (level < nilfs_btree_height(btree) - 1) {
1107 node = nilfs_btree_get_nonroot_node(path, level); 1192 node = nilfs_btree_get_nonroot_node(path, level);
1108 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1193 ncblk = nilfs_btree_nchildren_per_block(btree);
1109 path[level].bp_index); 1194 nilfs_btree_node_delete(node, path[level].bp_index,
1195 keyp, ptrp, ncblk);
1110 if (!buffer_dirty(path[level].bp_bh)) 1196 if (!buffer_dirty(path[level].bp_bh))
1111 nilfs_btnode_mark_dirty(path[level].bp_bh); 1197 nilfs_btnode_mark_dirty(path[level].bp_bh);
1112 if (path[level].bp_index == 0) 1198 if (path[level].bp_index == 0)
@@ -1114,17 +1200,18 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1114 nilfs_btree_node_get_key(node, 0)); 1200 nilfs_btree_node_get_key(node, 0));
1115 } else { 1201 } else {
1116 node = nilfs_btree_get_root(btree); 1202 node = nilfs_btree_get_root(btree);
1117 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1203 nilfs_btree_node_delete(node, path[level].bp_index,
1118 path[level].bp_index); 1204 keyp, ptrp,
1205 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1119 } 1206 }
1120} 1207}
1121 1208
1122static void nilfs_btree_borrow_left(struct nilfs_btree *btree, 1209static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1123 struct nilfs_btree_path *path, 1210 struct nilfs_btree_path *path,
1124 int level, __u64 *keyp, __u64 *ptrp) 1211 int level, __u64 *keyp, __u64 *ptrp)
1125{ 1212{
1126 struct nilfs_btree_node *node, *left; 1213 struct nilfs_btree_node *node, *left;
1127 int nchildren, lnchildren, n; 1214 int nchildren, lnchildren, n, ncblk;
1128 1215
1129 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1216 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1130 1217
@@ -1132,10 +1219,11 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1132 left = nilfs_btree_get_sib_node(path, level); 1219 left = nilfs_btree_get_sib_node(path, level);
1133 nchildren = nilfs_btree_node_get_nchildren(node); 1220 nchildren = nilfs_btree_node_get_nchildren(node);
1134 lnchildren = nilfs_btree_node_get_nchildren(left); 1221 lnchildren = nilfs_btree_node_get_nchildren(left);
1222 ncblk = nilfs_btree_nchildren_per_block(btree);
1135 1223
1136 n = (nchildren + lnchildren) / 2 - nchildren; 1224 n = (nchildren + lnchildren) / 2 - nchildren;
1137 1225
1138 nilfs_btree_node_move_right(btree, left, node, n); 1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1139 1227
1140 if (!buffer_dirty(path[level].bp_bh)) 1228 if (!buffer_dirty(path[level].bp_bh))
1141 nilfs_btnode_mark_dirty(path[level].bp_bh); 1229 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -1150,12 +1238,12 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1150 path[level].bp_index += n; 1238 path[level].bp_index += n;
1151} 1239}
1152 1240
1153static void nilfs_btree_borrow_right(struct nilfs_btree *btree, 1241static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1154 struct nilfs_btree_path *path, 1242 struct nilfs_btree_path *path,
1155 int level, __u64 *keyp, __u64 *ptrp) 1243 int level, __u64 *keyp, __u64 *ptrp)
1156{ 1244{
1157 struct nilfs_btree_node *node, *right; 1245 struct nilfs_btree_node *node, *right;
1158 int nchildren, rnchildren, n; 1246 int nchildren, rnchildren, n, ncblk;
1159 1247
1160 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1248 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1161 1249
@@ -1163,10 +1251,11 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1163 right = nilfs_btree_get_sib_node(path, level); 1251 right = nilfs_btree_get_sib_node(path, level);
1164 nchildren = nilfs_btree_node_get_nchildren(node); 1252 nchildren = nilfs_btree_node_get_nchildren(node);
1165 rnchildren = nilfs_btree_node_get_nchildren(right); 1253 rnchildren = nilfs_btree_node_get_nchildren(right);
1254 ncblk = nilfs_btree_nchildren_per_block(btree);
1166 1255
1167 n = (nchildren + rnchildren) / 2 - nchildren; 1256 n = (nchildren + rnchildren) / 2 - nchildren;
1168 1257
1169 nilfs_btree_node_move_left(btree, node, right, n); 1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1170 1259
1171 if (!buffer_dirty(path[level].bp_bh)) 1260 if (!buffer_dirty(path[level].bp_bh))
1172 nilfs_btnode_mark_dirty(path[level].bp_bh); 1261 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -1182,21 +1271,22 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1182 path[level].bp_sib_bh = NULL; 1271 path[level].bp_sib_bh = NULL;
1183} 1272}
1184 1273
1185static void nilfs_btree_concat_left(struct nilfs_btree *btree, 1274static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1186 struct nilfs_btree_path *path, 1275 struct nilfs_btree_path *path,
1187 int level, __u64 *keyp, __u64 *ptrp) 1276 int level, __u64 *keyp, __u64 *ptrp)
1188{ 1277{
1189 struct nilfs_btree_node *node, *left; 1278 struct nilfs_btree_node *node, *left;
1190 int n; 1279 int n, ncblk;
1191 1280
1192 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1281 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1193 1282
1194 node = nilfs_btree_get_nonroot_node(path, level); 1283 node = nilfs_btree_get_nonroot_node(path, level);
1195 left = nilfs_btree_get_sib_node(path, level); 1284 left = nilfs_btree_get_sib_node(path, level);
1285 ncblk = nilfs_btree_nchildren_per_block(btree);
1196 1286
1197 n = nilfs_btree_node_get_nchildren(node); 1287 n = nilfs_btree_node_get_nchildren(node);
1198 1288
1199 nilfs_btree_node_move_left(btree, left, node, n); 1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1200 1290
1201 if (!buffer_dirty(path[level].bp_sib_bh)) 1291 if (!buffer_dirty(path[level].bp_sib_bh))
1202 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1292 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
@@ -1207,21 +1297,22 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1207 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1297 path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1208} 1298}
1209 1299
1210static void nilfs_btree_concat_right(struct nilfs_btree *btree, 1300static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1211 struct nilfs_btree_path *path, 1301 struct nilfs_btree_path *path,
1212 int level, __u64 *keyp, __u64 *ptrp) 1302 int level, __u64 *keyp, __u64 *ptrp)
1213{ 1303{
1214 struct nilfs_btree_node *node, *right; 1304 struct nilfs_btree_node *node, *right;
1215 int n; 1305 int n, ncblk;
1216 1306
1217 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1307 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1218 1308
1219 node = nilfs_btree_get_nonroot_node(path, level); 1309 node = nilfs_btree_get_nonroot_node(path, level);
1220 right = nilfs_btree_get_sib_node(path, level); 1310 right = nilfs_btree_get_sib_node(path, level);
1311 ncblk = nilfs_btree_nchildren_per_block(btree);
1221 1312
1222 n = nilfs_btree_node_get_nchildren(right); 1313 n = nilfs_btree_node_get_nchildren(right);
1223 1314
1224 nilfs_btree_node_move_left(btree, node, right, n); 1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1225 1316
1226 if (!buffer_dirty(path[level].bp_bh)) 1317 if (!buffer_dirty(path[level].bp_bh))
1227 nilfs_btnode_mark_dirty(path[level].bp_bh); 1318 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -1231,29 +1322,32 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1231 path[level + 1].bp_index++; 1322 path[level + 1].bp_index++;
1232} 1323}
1233 1324
1234static void nilfs_btree_shrink(struct nilfs_btree *btree, 1325static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1235 struct nilfs_btree_path *path, 1326 struct nilfs_btree_path *path,
1236 int level, __u64 *keyp, __u64 *ptrp) 1327 int level, __u64 *keyp, __u64 *ptrp)
1237{ 1328{
1238 struct nilfs_btree_node *root, *child; 1329 struct nilfs_btree_node *root, *child;
1239 int n; 1330 int n, ncblk;
1240 1331
1241 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1332 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1242 1333
1243 root = nilfs_btree_get_root(btree); 1334 root = nilfs_btree_get_root(btree);
1244 child = nilfs_btree_get_nonroot_node(path, level); 1335 child = nilfs_btree_get_nonroot_node(path, level);
1336 ncblk = nilfs_btree_nchildren_per_block(btree);
1245 1337
1246 nilfs_btree_node_delete(btree, root, NULL, NULL, 0); 1338 nilfs_btree_node_delete(root, 0, NULL, NULL,
1339 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1247 nilfs_btree_node_set_level(root, level); 1340 nilfs_btree_node_set_level(root, level);
1248 n = nilfs_btree_node_get_nchildren(child); 1341 n = nilfs_btree_node_get_nchildren(child);
1249 nilfs_btree_node_move_left(btree, root, child, n); 1342 nilfs_btree_node_move_left(root, child, n,
1343 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1250 1344
1251 nilfs_btnode_delete(path[level].bp_bh); 1345 nilfs_btnode_delete(path[level].bp_bh);
1252 path[level].bp_bh = NULL; 1346 path[level].bp_bh = NULL;
1253} 1347}
1254 1348
1255 1349
1256static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, 1350static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1257 struct nilfs_btree_path *path, 1351 struct nilfs_btree_path *path,
1258 int *levelp, 1352 int *levelp,
1259 struct nilfs_bmap_stats *stats, 1353 struct nilfs_bmap_stats *stats,
@@ -1262,42 +1356,43 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1262 struct buffer_head *bh; 1356 struct buffer_head *bh;
1263 struct nilfs_btree_node *node, *parent, *sib; 1357 struct nilfs_btree_node *node, *parent, *sib;
1264 __u64 sibptr; 1358 __u64 sibptr;
1265 int pindex, level, ret; 1359 int pindex, level, ncmin, ncmax, ncblk, ret;
1266 1360
1267 ret = 0; 1361 ret = 0;
1268 stats->bs_nblocks = 0; 1362 stats->bs_nblocks = 0;
1363 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1364 ncblk = nilfs_btree_nchildren_per_block(btree);
1365
1269 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1366 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1270 level < nilfs_btree_height(btree) - 1; 1367 level < nilfs_btree_height(btree) - 1;
1271 level++) { 1368 level++) {
1272 node = nilfs_btree_get_nonroot_node(path, level); 1369 node = nilfs_btree_get_nonroot_node(path, level);
1273 path[level].bp_oldreq.bpr_ptr = 1370 path[level].bp_oldreq.bpr_ptr =
1274 nilfs_btree_node_get_ptr(btree, node, 1371 nilfs_btree_node_get_ptr(node, path[level].bp_index,
1275 path[level].bp_index); 1372 ncblk);
1276 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, 1373 ret = nilfs_bmap_prepare_end_ptr(btree,
1277 &path[level].bp_oldreq, dat); 1374 &path[level].bp_oldreq, dat);
1278 if (ret < 0) 1375 if (ret < 0)
1279 goto err_out_child_node; 1376 goto err_out_child_node;
1280 1377
1281 if (nilfs_btree_node_get_nchildren(node) > 1378 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1282 nilfs_btree_node_nchildren_min(node, btree)) {
1283 path[level].bp_op = nilfs_btree_do_delete; 1379 path[level].bp_op = nilfs_btree_do_delete;
1284 stats->bs_nblocks++; 1380 stats->bs_nblocks++;
1285 goto out; 1381 goto out;
1286 } 1382 }
1287 1383
1288 parent = nilfs_btree_get_node(btree, path, level + 1); 1384 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1289 pindex = path[level + 1].bp_index; 1385 pindex = path[level + 1].bp_index;
1290 1386
1291 if (pindex > 0) { 1387 if (pindex > 0) {
1292 /* left sibling */ 1388 /* left sibling */
1293 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1389 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1294 pindex - 1); 1390 ncmax);
1295 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1391 ret = nilfs_btree_get_block(btree, sibptr, &bh);
1296 if (ret < 0) 1392 if (ret < 0)
1297 goto err_out_curr_node; 1393 goto err_out_curr_node;
1298 sib = (struct nilfs_btree_node *)bh->b_data; 1394 sib = (struct nilfs_btree_node *)bh->b_data;
1299 if (nilfs_btree_node_get_nchildren(sib) > 1395 if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1300 nilfs_btree_node_nchildren_min(sib, btree)) {
1301 path[level].bp_sib_bh = bh; 1396 path[level].bp_sib_bh = bh;
1302 path[level].bp_op = nilfs_btree_borrow_left; 1397 path[level].bp_op = nilfs_btree_borrow_left;
1303 stats->bs_nblocks++; 1398 stats->bs_nblocks++;
@@ -1311,14 +1406,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1311 } else if (pindex < 1406 } else if (pindex <
1312 nilfs_btree_node_get_nchildren(parent) - 1) { 1407 nilfs_btree_node_get_nchildren(parent) - 1) {
1313 /* right sibling */ 1408 /* right sibling */
1314 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1409 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1315 pindex + 1); 1410 ncmax);
1316 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1411 ret = nilfs_btree_get_block(btree, sibptr, &bh);
1317 if (ret < 0) 1412 if (ret < 0)
1318 goto err_out_curr_node; 1413 goto err_out_curr_node;
1319 sib = (struct nilfs_btree_node *)bh->b_data; 1414 sib = (struct nilfs_btree_node *)bh->b_data;
1320 if (nilfs_btree_node_get_nchildren(sib) > 1415 if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1321 nilfs_btree_node_nchildren_min(sib, btree)) {
1322 path[level].bp_sib_bh = bh; 1416 path[level].bp_sib_bh = bh;
1323 path[level].bp_op = nilfs_btree_borrow_right; 1417 path[level].bp_op = nilfs_btree_borrow_right;
1324 stats->bs_nblocks++; 1418 stats->bs_nblocks++;
@@ -1349,10 +1443,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1349 1443
1350 node = nilfs_btree_get_root(btree); 1444 node = nilfs_btree_get_root(btree);
1351 path[level].bp_oldreq.bpr_ptr = 1445 path[level].bp_oldreq.bpr_ptr =
1352 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); 1446 nilfs_btree_node_get_ptr(node, path[level].bp_index,
1447 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1353 1448
1354 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, 1449 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1355 &path[level].bp_oldreq, dat);
1356 if (ret < 0) 1450 if (ret < 0)
1357 goto err_out_child_node; 1451 goto err_out_child_node;
1358 1452
@@ -1367,75 +1461,68 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1367 1461
1368 /* error */ 1462 /* error */
1369 err_out_curr_node: 1463 err_out_curr_node:
1370 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat); 1464 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1371 err_out_child_node: 1465 err_out_child_node:
1372 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1466 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1373 brelse(path[level].bp_sib_bh); 1467 brelse(path[level].bp_sib_bh);
1374 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, 1468 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1375 &path[level].bp_oldreq, dat);
1376 } 1469 }
1377 *levelp = level; 1470 *levelp = level;
1378 stats->bs_nblocks = 0; 1471 stats->bs_nblocks = 0;
1379 return ret; 1472 return ret;
1380} 1473}
1381 1474
1382static void nilfs_btree_commit_delete(struct nilfs_btree *btree, 1475static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1383 struct nilfs_btree_path *path, 1476 struct nilfs_btree_path *path,
1384 int maxlevel, struct inode *dat) 1477 int maxlevel, struct inode *dat)
1385{ 1478{
1386 int level; 1479 int level;
1387 1480
1388 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1481 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1389 nilfs_bmap_commit_end_ptr(&btree->bt_bmap, 1482 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1390 &path[level].bp_oldreq, dat);
1391 path[level].bp_op(btree, path, level, NULL, NULL); 1483 path[level].bp_op(btree, path, level, NULL, NULL);
1392 } 1484 }
1393 1485
1394 if (!nilfs_bmap_dirty(&btree->bt_bmap)) 1486 if (!nilfs_bmap_dirty(btree))
1395 nilfs_bmap_set_dirty(&btree->bt_bmap); 1487 nilfs_bmap_set_dirty(btree);
1396} 1488}
1397 1489
1398static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) 1490static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1399 1491
1400{ 1492{
1401 struct nilfs_btree *btree;
1402 struct nilfs_btree_path *path; 1493 struct nilfs_btree_path *path;
1403 struct nilfs_bmap_stats stats; 1494 struct nilfs_bmap_stats stats;
1404 struct inode *dat; 1495 struct inode *dat;
1405 int level, ret; 1496 int level, ret;
1406 1497
1407 btree = (struct nilfs_btree *)bmap;
1408 path = nilfs_btree_alloc_path(); 1498 path = nilfs_btree_alloc_path();
1409 if (path == NULL) 1499 if (path == NULL)
1410 return -ENOMEM; 1500 return -ENOMEM;
1411 1501
1412 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1502 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1413 NILFS_BTREE_LEVEL_NODE_MIN); 1503 NILFS_BTREE_LEVEL_NODE_MIN, 0);
1414 if (ret < 0) 1504 if (ret < 0)
1415 goto out; 1505 goto out;
1416 1506
1417 1507
1418 dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ? 1508 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1419 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1420 1509
1421 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1510 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1422 if (ret < 0) 1511 if (ret < 0)
1423 goto out; 1512 goto out;
1424 nilfs_btree_commit_delete(btree, path, level, dat); 1513 nilfs_btree_commit_delete(btree, path, level, dat);
1425 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); 1514 nilfs_bmap_sub_blocks(btree, stats.bs_nblocks);
1426 1515
1427out: 1516out:
1428 nilfs_btree_free_path(path); 1517 nilfs_btree_free_path(path);
1429 return ret; 1518 return ret;
1430} 1519}
1431 1520
1432static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) 1521static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1433{ 1522{
1434 struct nilfs_btree *btree;
1435 struct nilfs_btree_path *path; 1523 struct nilfs_btree_path *path;
1436 int ret; 1524 int ret;
1437 1525
1438 btree = (struct nilfs_btree *)bmap;
1439 path = nilfs_btree_alloc_path(); 1526 path = nilfs_btree_alloc_path();
1440 if (path == NULL) 1527 if (path == NULL)
1441 return -ENOMEM; 1528 return -ENOMEM;
@@ -1447,16 +1534,14 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1447 return ret; 1534 return ret;
1448} 1535}
1449 1536
1450static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) 1537static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1451{ 1538{
1452 struct buffer_head *bh; 1539 struct buffer_head *bh;
1453 struct nilfs_btree *btree;
1454 struct nilfs_btree_node *root, *node; 1540 struct nilfs_btree_node *root, *node;
1455 __u64 maxkey, nextmaxkey; 1541 __u64 maxkey, nextmaxkey;
1456 __u64 ptr; 1542 __u64 ptr;
1457 int nchildren, ret; 1543 int nchildren, ret;
1458 1544
1459 btree = (struct nilfs_btree *)bmap;
1460 root = nilfs_btree_get_root(btree); 1545 root = nilfs_btree_get_root(btree);
1461 switch (nilfs_btree_height(btree)) { 1546 switch (nilfs_btree_height(btree)) {
1462 case 2: 1547 case 2:
@@ -1467,7 +1552,8 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1467 nchildren = nilfs_btree_node_get_nchildren(root); 1552 nchildren = nilfs_btree_node_get_nchildren(root);
1468 if (nchildren > 1) 1553 if (nchildren > 1)
1469 return 0; 1554 return 0;
1470 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1555 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1556 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1471 ret = nilfs_btree_get_block(btree, ptr, &bh); 1557 ret = nilfs_btree_get_block(btree, ptr, &bh);
1472 if (ret < 0) 1558 if (ret < 0)
1473 return ret; 1559 return ret;
@@ -1487,32 +1573,33 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1487 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1573 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1488} 1574}
1489 1575
1490static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, 1576static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1491 __u64 *keys, __u64 *ptrs, int nitems) 1577 __u64 *keys, __u64 *ptrs, int nitems)
1492{ 1578{
1493 struct buffer_head *bh; 1579 struct buffer_head *bh;
1494 struct nilfs_btree *btree;
1495 struct nilfs_btree_node *node, *root; 1580 struct nilfs_btree_node *node, *root;
1496 __le64 *dkeys; 1581 __le64 *dkeys;
1497 __le64 *dptrs; 1582 __le64 *dptrs;
1498 __u64 ptr; 1583 __u64 ptr;
1499 int nchildren, i, ret; 1584 int nchildren, ncmax, i, ret;
1500 1585
1501 btree = (struct nilfs_btree *)bmap;
1502 root = nilfs_btree_get_root(btree); 1586 root = nilfs_btree_get_root(btree);
1503 switch (nilfs_btree_height(btree)) { 1587 switch (nilfs_btree_height(btree)) {
1504 case 2: 1588 case 2:
1505 bh = NULL; 1589 bh = NULL;
1506 node = root; 1590 node = root;
1591 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1507 break; 1592 break;
1508 case 3: 1593 case 3:
1509 nchildren = nilfs_btree_node_get_nchildren(root); 1594 nchildren = nilfs_btree_node_get_nchildren(root);
1510 WARN_ON(nchildren > 1); 1595 WARN_ON(nchildren > 1);
1511 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1596 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1597 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1512 ret = nilfs_btree_get_block(btree, ptr, &bh); 1598 ret = nilfs_btree_get_block(btree, ptr, &bh);
1513 if (ret < 0) 1599 if (ret < 0)
1514 return ret; 1600 return ret;
1515 node = (struct nilfs_btree_node *)bh->b_data; 1601 node = (struct nilfs_btree_node *)bh->b_data;
1602 ncmax = nilfs_btree_nchildren_per_block(btree);
1516 break; 1603 break;
1517 default: 1604 default:
1518 node = NULL; 1605 node = NULL;
@@ -1523,10 +1610,10 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1523 if (nchildren < nitems) 1610 if (nchildren < nitems)
1524 nitems = nchildren; 1611 nitems = nchildren;
1525 dkeys = nilfs_btree_node_dkeys(node); 1612 dkeys = nilfs_btree_node_dkeys(node);
1526 dptrs = nilfs_btree_node_dptrs(node, btree); 1613 dptrs = nilfs_btree_node_dptrs(node, ncmax);
1527 for (i = 0; i < nitems; i++) { 1614 for (i = 0; i < nitems; i++) {
1528 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); 1615 keys[i] = le64_to_cpu(dkeys[i]);
1529 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); 1616 ptrs[i] = le64_to_cpu(dptrs[i]);
1530 } 1617 }
1531 1618
1532 if (bh != NULL) 1619 if (bh != NULL)
@@ -1536,14 +1623,13 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1536} 1623}
1537 1624
1538static int 1625static int
1539nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, 1626nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1540 union nilfs_bmap_ptr_req *dreq, 1627 union nilfs_bmap_ptr_req *dreq,
1541 union nilfs_bmap_ptr_req *nreq, 1628 union nilfs_bmap_ptr_req *nreq,
1542 struct buffer_head **bhp, 1629 struct buffer_head **bhp,
1543 struct nilfs_bmap_stats *stats) 1630 struct nilfs_bmap_stats *stats)
1544{ 1631{
1545 struct buffer_head *bh; 1632 struct buffer_head *bh;
1546 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1547 struct inode *dat = NULL; 1633 struct inode *dat = NULL;
1548 int ret; 1634 int ret;
1549 1635
@@ -1551,12 +1637,12 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1551 1637
1552 /* for data */ 1638 /* for data */
1553 /* cannot find near ptr */ 1639 /* cannot find near ptr */
1554 if (NILFS_BMAP_USE_VBN(bmap)) { 1640 if (NILFS_BMAP_USE_VBN(btree)) {
1555 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1641 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1556 dat = nilfs_bmap_get_dat(bmap); 1642 dat = nilfs_bmap_get_dat(btree);
1557 } 1643 }
1558 1644
1559 ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat); 1645 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1560 if (ret < 0) 1646 if (ret < 0)
1561 return ret; 1647 return ret;
1562 1648
@@ -1564,7 +1650,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1564 stats->bs_nblocks++; 1650 stats->bs_nblocks++;
1565 if (nreq != NULL) { 1651 if (nreq != NULL) {
1566 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1652 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1567 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat); 1653 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1568 if (ret < 0) 1654 if (ret < 0)
1569 goto err_out_dreq; 1655 goto err_out_dreq;
1570 1656
@@ -1581,16 +1667,16 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1581 1667
1582 /* error */ 1668 /* error */
1583 err_out_nreq: 1669 err_out_nreq:
1584 nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat); 1670 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1585 err_out_dreq: 1671 err_out_dreq:
1586 nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat); 1672 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1587 stats->bs_nblocks = 0; 1673 stats->bs_nblocks = 0;
1588 return ret; 1674 return ret;
1589 1675
1590} 1676}
1591 1677
1592static void 1678static void
1593nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, 1679nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1594 __u64 key, __u64 ptr, 1680 __u64 key, __u64 ptr,
1595 const __u64 *keys, const __u64 *ptrs, 1681 const __u64 *keys, const __u64 *ptrs,
1596 int n, 1682 int n,
@@ -1598,57 +1684,59 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1598 union nilfs_bmap_ptr_req *nreq, 1684 union nilfs_bmap_ptr_req *nreq,
1599 struct buffer_head *bh) 1685 struct buffer_head *bh)
1600{ 1686{
1601 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1602 struct nilfs_btree_node *node; 1687 struct nilfs_btree_node *node;
1603 struct inode *dat; 1688 struct inode *dat;
1604 __u64 tmpptr; 1689 __u64 tmpptr;
1690 int ncblk;
1605 1691
1606 /* free resources */ 1692 /* free resources */
1607 if (bmap->b_ops->bop_clear != NULL) 1693 if (btree->b_ops->bop_clear != NULL)
1608 bmap->b_ops->bop_clear(bmap); 1694 btree->b_ops->bop_clear(btree);
1609 1695
1610 /* ptr must be a pointer to a buffer head. */ 1696 /* ptr must be a pointer to a buffer head. */
1611 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1697 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1612 1698
1613 /* convert and insert */ 1699 /* convert and insert */
1614 dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; 1700 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1615 nilfs_btree_init(bmap); 1701 nilfs_btree_init(btree);
1616 if (nreq != NULL) { 1702 if (nreq != NULL) {
1617 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); 1703 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1618 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); 1704 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1619 1705
1620 /* create child node at level 1 */ 1706 /* create child node at level 1 */
1621 node = (struct nilfs_btree_node *)bh->b_data; 1707 node = (struct nilfs_btree_node *)bh->b_data;
1622 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); 1708 ncblk = nilfs_btree_nchildren_per_block(btree);
1623 nilfs_btree_node_insert(btree, node, 1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1624 key, dreq->bpr_ptr, n); 1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1625 if (!buffer_dirty(bh)) 1711 if (!buffer_dirty(bh))
1626 nilfs_btnode_mark_dirty(bh); 1712 nilfs_btnode_mark_dirty(bh);
1627 if (!nilfs_bmap_dirty(bmap)) 1713 if (!nilfs_bmap_dirty(btree))
1628 nilfs_bmap_set_dirty(bmap); 1714 nilfs_bmap_set_dirty(btree);
1629 1715
1630 brelse(bh); 1716 brelse(bh);
1631 1717
1632 /* create root node at level 2 */ 1718 /* create root node at level 2 */
1633 node = nilfs_btree_get_root(btree); 1719 node = nilfs_btree_get_root(btree);
1634 tmpptr = nreq->bpr_ptr; 1720 tmpptr = nreq->bpr_ptr;
1635 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, 1721 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1636 2, 1, &keys[0], &tmpptr); 1722 NILFS_BTREE_ROOT_NCHILDREN_MAX,
1723 &keys[0], &tmpptr);
1637 } else { 1724 } else {
1638 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); 1725 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1639 1726
1640 /* create root node at level 1 */ 1727 /* create root node at level 1 */
1641 node = nilfs_btree_get_root(btree); 1728 node = nilfs_btree_get_root(btree);
1642 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, 1729 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1643 1, n, keys, ptrs); 1730 NILFS_BTREE_ROOT_NCHILDREN_MAX,
1644 nilfs_btree_node_insert(btree, node, 1731 keys, ptrs);
1645 key, dreq->bpr_ptr, n); 1732 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1646 if (!nilfs_bmap_dirty(bmap)) 1733 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1647 nilfs_bmap_set_dirty(bmap); 1734 if (!nilfs_bmap_dirty(btree))
1735 nilfs_bmap_set_dirty(btree);
1648 } 1736 }
1649 1737
1650 if (NILFS_BMAP_USE_VBN(bmap)) 1738 if (NILFS_BMAP_USE_VBN(btree))
1651 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr); 1739 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1652} 1740}
1653 1741
1654/** 1742/**
@@ -1660,7 +1748,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1660 * @ptrs: 1748 * @ptrs:
1661 * @n: 1749 * @n:
1662 */ 1750 */
1663int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, 1751int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1664 __u64 key, __u64 ptr, 1752 __u64 key, __u64 ptr,
1665 const __u64 *keys, const __u64 *ptrs, int n) 1753 const __u64 *keys, const __u64 *ptrs, int n)
1666{ 1754{
@@ -1673,7 +1761,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1673 di = &dreq; 1761 di = &dreq;
1674 ni = NULL; 1762 ni = NULL;
1675 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1763 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1676 1 << bmap->b_inode->i_blkbits)) { 1764 1 << btree->b_inode->i_blkbits)) {
1677 di = &dreq; 1765 di = &dreq;
1678 ni = &nreq; 1766 ni = &nreq;
1679 } else { 1767 } else {
@@ -1682,17 +1770,17 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1682 BUG(); 1770 BUG();
1683 } 1771 }
1684 1772
1685 ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh, 1773 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1686 &stats); 1774 &stats);
1687 if (ret < 0) 1775 if (ret < 0)
1688 return ret; 1776 return ret;
1689 nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, 1777 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1690 di, ni, bh); 1778 di, ni, bh);
1691 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 1779 nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1692 return 0; 1780 return 0;
1693} 1781}
1694 1782
1695static int nilfs_btree_propagate_p(struct nilfs_btree *btree, 1783static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1696 struct nilfs_btree_path *path, 1784 struct nilfs_btree_path *path,
1697 int level, 1785 int level,
1698 struct buffer_head *bh) 1786 struct buffer_head *bh)
@@ -1704,17 +1792,17 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1704 return 0; 1792 return 0;
1705} 1793}
1706 1794
1707static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, 1795static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1708 struct nilfs_btree_path *path, 1796 struct nilfs_btree_path *path,
1709 int level, struct inode *dat) 1797 int level, struct inode *dat)
1710{ 1798{
1711 struct nilfs_btree_node *parent; 1799 struct nilfs_btree_node *parent;
1712 int ret; 1800 int ncmax, ret;
1713 1801
1714 parent = nilfs_btree_get_node(btree, path, level + 1); 1802 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1715 path[level].bp_oldreq.bpr_ptr = 1803 path[level].bp_oldreq.bpr_ptr =
1716 nilfs_btree_node_get_ptr(btree, parent, 1804 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1717 path[level + 1].bp_index); 1805 ncmax);
1718 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1806 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1719 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1807 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1720 &path[level].bp_newreq.bpr_req); 1808 &path[level].bp_newreq.bpr_req);
@@ -1726,7 +1814,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1726 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1814 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1727 path[level].bp_ctxt.bh = path[level].bp_bh; 1815 path[level].bp_ctxt.bh = path[level].bp_bh;
1728 ret = nilfs_btnode_prepare_change_key( 1816 ret = nilfs_btnode_prepare_change_key(
1729 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1817 &NILFS_BMAP_I(btree)->i_btnode_cache,
1730 &path[level].bp_ctxt); 1818 &path[level].bp_ctxt);
1731 if (ret < 0) { 1819 if (ret < 0) {
1732 nilfs_dat_abort_update(dat, 1820 nilfs_dat_abort_update(dat,
@@ -1739,30 +1827,31 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1739 return 0; 1827 return 0;
1740} 1828}
1741 1829
1742static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, 1830static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1743 struct nilfs_btree_path *path, 1831 struct nilfs_btree_path *path,
1744 int level, struct inode *dat) 1832 int level, struct inode *dat)
1745{ 1833{
1746 struct nilfs_btree_node *parent; 1834 struct nilfs_btree_node *parent;
1835 int ncmax;
1747 1836
1748 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1837 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1749 &path[level].bp_newreq.bpr_req, 1838 &path[level].bp_newreq.bpr_req,
1750 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS); 1839 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1751 1840
1752 if (buffer_nilfs_node(path[level].bp_bh)) { 1841 if (buffer_nilfs_node(path[level].bp_bh)) {
1753 nilfs_btnode_commit_change_key( 1842 nilfs_btnode_commit_change_key(
1754 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1843 &NILFS_BMAP_I(btree)->i_btnode_cache,
1755 &path[level].bp_ctxt); 1844 &path[level].bp_ctxt);
1756 path[level].bp_bh = path[level].bp_ctxt.bh; 1845 path[level].bp_bh = path[level].bp_ctxt.bh;
1757 } 1846 }
1758 set_buffer_nilfs_volatile(path[level].bp_bh); 1847 set_buffer_nilfs_volatile(path[level].bp_bh);
1759 1848
1760 parent = nilfs_btree_get_node(btree, path, level + 1); 1849 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1761 nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index, 1850 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1762 path[level].bp_newreq.bpr_ptr); 1851 path[level].bp_newreq.bpr_ptr, ncmax);
1763} 1852}
1764 1853
1765static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, 1854static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1766 struct nilfs_btree_path *path, 1855 struct nilfs_btree_path *path,
1767 int level, struct inode *dat) 1856 int level, struct inode *dat)
1768{ 1857{
@@ -1770,11 +1859,11 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1770 &path[level].bp_newreq.bpr_req); 1859 &path[level].bp_newreq.bpr_req);
1771 if (buffer_nilfs_node(path[level].bp_bh)) 1860 if (buffer_nilfs_node(path[level].bp_bh))
1772 nilfs_btnode_abort_change_key( 1861 nilfs_btnode_abort_change_key(
1773 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1862 &NILFS_BMAP_I(btree)->i_btnode_cache,
1774 &path[level].bp_ctxt); 1863 &path[level].bp_ctxt);
1775} 1864}
1776 1865
1777static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, 1866static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1778 struct nilfs_btree_path *path, 1867 struct nilfs_btree_path *path,
1779 int minlevel, int *maxlevelp, 1868 int minlevel, int *maxlevelp,
1780 struct inode *dat) 1869 struct inode *dat)
@@ -1809,7 +1898,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1809 return ret; 1898 return ret;
1810} 1899}
1811 1900
1812static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, 1901static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1813 struct nilfs_btree_path *path, 1902 struct nilfs_btree_path *path,
1814 int minlevel, int maxlevel, 1903 int minlevel, int maxlevel,
1815 struct buffer_head *bh, 1904 struct buffer_head *bh,
@@ -1824,14 +1913,15 @@ static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1824 nilfs_btree_commit_update_v(btree, path, level, dat); 1913 nilfs_btree_commit_update_v(btree, path, level, dat);
1825} 1914}
1826 1915
1827static int nilfs_btree_propagate_v(struct nilfs_btree *btree, 1916static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1828 struct nilfs_btree_path *path, 1917 struct nilfs_btree_path *path,
1829 int level, struct buffer_head *bh) 1918 int level, struct buffer_head *bh)
1830{ 1919{
1831 int maxlevel = 0, ret; 1920 int maxlevel = 0, ret;
1832 struct nilfs_btree_node *parent; 1921 struct nilfs_btree_node *parent;
1833 struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); 1922 struct inode *dat = nilfs_bmap_get_dat(btree);
1834 __u64 ptr; 1923 __u64 ptr;
1924 int ncmax;
1835 1925
1836 get_bh(bh); 1926 get_bh(bh);
1837 path[level].bp_bh = bh; 1927 path[level].bp_bh = bh;
@@ -1841,9 +1931,10 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1841 goto out; 1931 goto out;
1842 1932
1843 if (buffer_nilfs_volatile(path[level].bp_bh)) { 1933 if (buffer_nilfs_volatile(path[level].bp_bh)) {
1844 parent = nilfs_btree_get_node(btree, path, level + 1); 1934 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1845 ptr = nilfs_btree_node_get_ptr(btree, parent, 1935 ptr = nilfs_btree_node_get_ptr(parent,
1846 path[level + 1].bp_index); 1936 path[level + 1].bp_index,
1937 ncmax);
1847 ret = nilfs_dat_mark_dirty(dat, ptr); 1938 ret = nilfs_dat_mark_dirty(dat, ptr);
1848 if (ret < 0) 1939 if (ret < 0)
1849 goto out; 1940 goto out;
@@ -1857,10 +1948,9 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1857 return ret; 1948 return ret;
1858} 1949}
1859 1950
1860static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, 1951static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1861 struct buffer_head *bh) 1952 struct buffer_head *bh)
1862{ 1953{
1863 struct nilfs_btree *btree;
1864 struct nilfs_btree_path *path; 1954 struct nilfs_btree_path *path;
1865 struct nilfs_btree_node *node; 1955 struct nilfs_btree_node *node;
1866 __u64 key; 1956 __u64 key;
@@ -1868,7 +1958,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1868 1958
1869 WARN_ON(!buffer_dirty(bh)); 1959 WARN_ON(!buffer_dirty(bh));
1870 1960
1871 btree = (struct nilfs_btree *)bmap;
1872 path = nilfs_btree_alloc_path(); 1961 path = nilfs_btree_alloc_path();
1873 if (path == NULL) 1962 if (path == NULL)
1874 return -ENOMEM; 1963 return -ENOMEM;
@@ -1878,11 +1967,11 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1878 key = nilfs_btree_node_get_key(node, 0); 1967 key = nilfs_btree_node_get_key(node, 0);
1879 level = nilfs_btree_node_get_level(node); 1968 level = nilfs_btree_node_get_level(node);
1880 } else { 1969 } else {
1881 key = nilfs_bmap_data_get_key(bmap, bh); 1970 key = nilfs_bmap_data_get_key(btree, bh);
1882 level = NILFS_BTREE_LEVEL_DATA; 1971 level = NILFS_BTREE_LEVEL_DATA;
1883 } 1972 }
1884 1973
1885 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); 1974 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
1886 if (ret < 0) { 1975 if (ret < 0) {
1887 if (unlikely(ret == -ENOENT)) 1976 if (unlikely(ret == -ENOENT))
1888 printk(KERN_CRIT "%s: key = %llu, level == %d\n", 1977 printk(KERN_CRIT "%s: key = %llu, level == %d\n",
@@ -1890,7 +1979,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1890 goto out; 1979 goto out;
1891 } 1980 }
1892 1981
1893 ret = NILFS_BMAP_USE_VBN(bmap) ? 1982 ret = NILFS_BMAP_USE_VBN(btree) ?
1894 nilfs_btree_propagate_v(btree, path, level, bh) : 1983 nilfs_btree_propagate_v(btree, path, level, bh) :
1895 nilfs_btree_propagate_p(btree, path, level, bh); 1984 nilfs_btree_propagate_p(btree, path, level, bh);
1896 1985
@@ -1900,13 +1989,13 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1900 return ret; 1989 return ret;
1901} 1990}
1902 1991
1903static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, 1992static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
1904 struct buffer_head *bh) 1993 struct buffer_head *bh)
1905{ 1994{
1906 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr); 1995 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
1907} 1996}
1908 1997
1909static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, 1998static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
1910 struct list_head *lists, 1999 struct list_head *lists,
1911 struct buffer_head *bh) 2000 struct buffer_head *bh)
1912{ 2001{
@@ -1920,6 +2009,18 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1920 node = (struct nilfs_btree_node *)bh->b_data; 2009 node = (struct nilfs_btree_node *)bh->b_data;
1921 key = nilfs_btree_node_get_key(node, 0); 2010 key = nilfs_btree_node_get_key(node, 0);
1922 level = nilfs_btree_node_get_level(node); 2011 level = nilfs_btree_node_get_level(node);
2012 if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2013 level >= NILFS_BTREE_LEVEL_MAX) {
2014 dump_stack();
2015 printk(KERN_WARNING
2016 "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2017 "blocknr=%llu)\n",
2018 __func__, level, (unsigned long long)key,
2019 NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2020 (unsigned long long)bh->b_blocknr);
2021 return;
2022 }
2023
1923 list_for_each(head, &lists[level]) { 2024 list_for_each(head, &lists[level]) {
1924 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2025 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1925 cnode = (struct nilfs_btree_node *)cbh->b_data; 2026 cnode = (struct nilfs_btree_node *)cbh->b_data;
@@ -1930,11 +2031,10 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1930 list_add_tail(&bh->b_assoc_buffers, head); 2031 list_add_tail(&bh->b_assoc_buffers, head);
1931} 2032}
1932 2033
1933static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, 2034static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
1934 struct list_head *listp) 2035 struct list_head *listp)
1935{ 2036{
1936 struct nilfs_btree *btree = (struct nilfs_btree *)bmap; 2037 struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
1937 struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1938 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2038 struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1939 struct pagevec pvec; 2039 struct pagevec pvec;
1940 struct buffer_head *bh, *head; 2040 struct buffer_head *bh, *head;
@@ -1968,7 +2068,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1968 list_splice_tail(&lists[level], listp); 2068 list_splice_tail(&lists[level], listp);
1969} 2069}
1970 2070
1971static int nilfs_btree_assign_p(struct nilfs_btree *btree, 2071static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
1972 struct nilfs_btree_path *path, 2072 struct nilfs_btree_path *path,
1973 int level, 2073 int level,
1974 struct buffer_head **bh, 2074 struct buffer_head **bh,
@@ -1978,38 +2078,38 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree,
1978 struct nilfs_btree_node *parent; 2078 struct nilfs_btree_node *parent;
1979 __u64 key; 2079 __u64 key;
1980 __u64 ptr; 2080 __u64 ptr;
1981 int ret; 2081 int ncmax, ret;
1982 2082
1983 parent = nilfs_btree_get_node(btree, path, level + 1); 2083 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1984 ptr = nilfs_btree_node_get_ptr(btree, parent, 2084 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1985 path[level + 1].bp_index); 2085 ncmax);
1986 if (buffer_nilfs_node(*bh)) { 2086 if (buffer_nilfs_node(*bh)) {
1987 path[level].bp_ctxt.oldkey = ptr; 2087 path[level].bp_ctxt.oldkey = ptr;
1988 path[level].bp_ctxt.newkey = blocknr; 2088 path[level].bp_ctxt.newkey = blocknr;
1989 path[level].bp_ctxt.bh = *bh; 2089 path[level].bp_ctxt.bh = *bh;
1990 ret = nilfs_btnode_prepare_change_key( 2090 ret = nilfs_btnode_prepare_change_key(
1991 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 2091 &NILFS_BMAP_I(btree)->i_btnode_cache,
1992 &path[level].bp_ctxt); 2092 &path[level].bp_ctxt);
1993 if (ret < 0) 2093 if (ret < 0)
1994 return ret; 2094 return ret;
1995 nilfs_btnode_commit_change_key( 2095 nilfs_btnode_commit_change_key(
1996 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 2096 &NILFS_BMAP_I(btree)->i_btnode_cache,
1997 &path[level].bp_ctxt); 2097 &path[level].bp_ctxt);
1998 *bh = path[level].bp_ctxt.bh; 2098 *bh = path[level].bp_ctxt.bh;
1999 } 2099 }
2000 2100
2001 nilfs_btree_node_set_ptr(btree, parent, 2101 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2002 path[level + 1].bp_index, blocknr); 2102 ncmax);
2003 2103
2004 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2104 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2005 /* on-disk format */ 2105 /* on-disk format */
2006 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); 2106 binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2007 binfo->bi_dat.bi_level = level; 2107 binfo->bi_dat.bi_level = level;
2008 2108
2009 return 0; 2109 return 0;
2010} 2110}
2011 2111
2012static int nilfs_btree_assign_v(struct nilfs_btree *btree, 2112static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2013 struct nilfs_btree_path *path, 2113 struct nilfs_btree_path *path,
2014 int level, 2114 int level,
2015 struct buffer_head **bh, 2115 struct buffer_head **bh,
@@ -2017,15 +2117,15 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2017 union nilfs_binfo *binfo) 2117 union nilfs_binfo *binfo)
2018{ 2118{
2019 struct nilfs_btree_node *parent; 2119 struct nilfs_btree_node *parent;
2020 struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); 2120 struct inode *dat = nilfs_bmap_get_dat(btree);
2021 __u64 key; 2121 __u64 key;
2022 __u64 ptr; 2122 __u64 ptr;
2023 union nilfs_bmap_ptr_req req; 2123 union nilfs_bmap_ptr_req req;
2024 int ret; 2124 int ncmax, ret;
2025 2125
2026 parent = nilfs_btree_get_node(btree, path, level + 1); 2126 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2027 ptr = nilfs_btree_node_get_ptr(btree, parent, 2127 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2028 path[level + 1].bp_index); 2128 ncmax);
2029 req.bpr_ptr = ptr; 2129 req.bpr_ptr = ptr;
2030 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2130 ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2031 if (ret < 0) 2131 if (ret < 0)
@@ -2034,24 +2134,22 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2034 2134
2035 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2135 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2036 /* on-disk format */ 2136 /* on-disk format */
2037 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); 2137 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2038 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 2138 binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2039 2139
2040 return 0; 2140 return 0;
2041} 2141}
2042 2142
2043static int nilfs_btree_assign(struct nilfs_bmap *bmap, 2143static int nilfs_btree_assign(struct nilfs_bmap *btree,
2044 struct buffer_head **bh, 2144 struct buffer_head **bh,
2045 sector_t blocknr, 2145 sector_t blocknr,
2046 union nilfs_binfo *binfo) 2146 union nilfs_binfo *binfo)
2047{ 2147{
2048 struct nilfs_btree *btree;
2049 struct nilfs_btree_path *path; 2148 struct nilfs_btree_path *path;
2050 struct nilfs_btree_node *node; 2149 struct nilfs_btree_node *node;
2051 __u64 key; 2150 __u64 key;
2052 int level, ret; 2151 int level, ret;
2053 2152
2054 btree = (struct nilfs_btree *)bmap;
2055 path = nilfs_btree_alloc_path(); 2153 path = nilfs_btree_alloc_path();
2056 if (path == NULL) 2154 if (path == NULL)
2057 return -ENOMEM; 2155 return -ENOMEM;
@@ -2061,17 +2159,17 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2061 key = nilfs_btree_node_get_key(node, 0); 2159 key = nilfs_btree_node_get_key(node, 0);
2062 level = nilfs_btree_node_get_level(node); 2160 level = nilfs_btree_node_get_level(node);
2063 } else { 2161 } else {
2064 key = nilfs_bmap_data_get_key(bmap, *bh); 2162 key = nilfs_bmap_data_get_key(btree, *bh);
2065 level = NILFS_BTREE_LEVEL_DATA; 2163 level = NILFS_BTREE_LEVEL_DATA;
2066 } 2164 }
2067 2165
2068 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); 2166 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2069 if (ret < 0) { 2167 if (ret < 0) {
2070 WARN_ON(ret == -ENOENT); 2168 WARN_ON(ret == -ENOENT);
2071 goto out; 2169 goto out;
2072 } 2170 }
2073 2171
2074 ret = NILFS_BMAP_USE_VBN(bmap) ? 2172 ret = NILFS_BMAP_USE_VBN(btree) ?
2075 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2173 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2076 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2174 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2077 2175
@@ -2081,7 +2179,7 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2081 return ret; 2179 return ret;
2082} 2180}
2083 2181
2084static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, 2182static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2085 struct buffer_head **bh, 2183 struct buffer_head **bh,
2086 sector_t blocknr, 2184 sector_t blocknr,
2087 union nilfs_binfo *binfo) 2185 union nilfs_binfo *binfo)
@@ -2090,7 +2188,7 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2090 __u64 key; 2188 __u64 key;
2091 int ret; 2189 int ret;
2092 2190
2093 ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr, 2191 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2094 blocknr); 2192 blocknr);
2095 if (ret < 0) 2193 if (ret < 0)
2096 return ret; 2194 return ret;
@@ -2099,29 +2197,27 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2099 node = (struct nilfs_btree_node *)(*bh)->b_data; 2197 node = (struct nilfs_btree_node *)(*bh)->b_data;
2100 key = nilfs_btree_node_get_key(node, 0); 2198 key = nilfs_btree_node_get_key(node, 0);
2101 } else 2199 } else
2102 key = nilfs_bmap_data_get_key(bmap, *bh); 2200 key = nilfs_bmap_data_get_key(btree, *bh);
2103 2201
2104 /* on-disk format */ 2202 /* on-disk format */
2105 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2203 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2106 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 2204 binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2107 2205
2108 return 0; 2206 return 0;
2109} 2207}
2110 2208
2111static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) 2209static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2112{ 2210{
2113 struct buffer_head *bh; 2211 struct buffer_head *bh;
2114 struct nilfs_btree *btree;
2115 struct nilfs_btree_path *path; 2212 struct nilfs_btree_path *path;
2116 __u64 ptr; 2213 __u64 ptr;
2117 int ret; 2214 int ret;
2118 2215
2119 btree = (struct nilfs_btree *)bmap;
2120 path = nilfs_btree_alloc_path(); 2216 path = nilfs_btree_alloc_path();
2121 if (path == NULL) 2217 if (path == NULL)
2122 return -ENOMEM; 2218 return -ENOMEM;
2123 2219
2124 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); 2220 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2125 if (ret < 0) { 2221 if (ret < 0) {
2126 WARN_ON(ret == -ENOENT); 2222 WARN_ON(ret == -ENOENT);
2127 goto out; 2223 goto out;
@@ -2135,8 +2231,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2135 if (!buffer_dirty(bh)) 2231 if (!buffer_dirty(bh))
2136 nilfs_btnode_mark_dirty(bh); 2232 nilfs_btnode_mark_dirty(bh);
2137 brelse(bh); 2233 brelse(bh);
2138 if (!nilfs_bmap_dirty(&btree->bt_bmap)) 2234 if (!nilfs_bmap_dirty(btree))
2139 nilfs_bmap_set_dirty(&btree->bt_bmap); 2235 nilfs_bmap_set_dirty(btree);
2140 2236
2141 out: 2237 out:
2142 nilfs_btree_free_path(path); 2238 nilfs_btree_free_path(path);
@@ -2186,10 +2282,14 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2186int nilfs_btree_init(struct nilfs_bmap *bmap) 2282int nilfs_btree_init(struct nilfs_bmap *bmap)
2187{ 2283{
2188 bmap->b_ops = &nilfs_btree_ops; 2284 bmap->b_ops = &nilfs_btree_ops;
2285 bmap->b_nchildren_per_block =
2286 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2189 return 0; 2287 return 0;
2190} 2288}
2191 2289
2192void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2290void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2193{ 2291{
2194 bmap->b_ops = &nilfs_btree_ops_gc; 2292 bmap->b_ops = &nilfs_btree_ops_gc;
2293 bmap->b_nchildren_per_block =
2294 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2195} 2295}
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 43c8c5b541fd..22c02e35b6ef 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -31,14 +31,6 @@
31#include "bmap.h" 31#include "bmap.h"
32 32
33/** 33/**
34 * struct nilfs_btree - B-tree structure
35 * @bt_bmap: bmap base structure
36 */
37struct nilfs_btree {
38 struct nilfs_bmap bt_bmap;
39};
40
41/**
42 * struct nilfs_btree_path - A path on which B-tree operations are executed 34 * struct nilfs_btree_path - A path on which B-tree operations are executed
43 * @bp_bh: buffer head of node block 35 * @bp_bh: buffer head of node block
44 * @bp_sib_bh: buffer head of sibling node block 36 * @bp_sib_bh: buffer head of sibling node block
@@ -54,7 +46,7 @@ struct nilfs_btree_path {
54 union nilfs_bmap_ptr_req bp_oldreq; 46 union nilfs_bmap_ptr_req bp_oldreq;
55 union nilfs_bmap_ptr_req bp_newreq; 47 union nilfs_bmap_ptr_req bp_newreq;
56 struct nilfs_btnode_chkey_ctxt bp_ctxt; 48 struct nilfs_btnode_chkey_ctxt bp_ctxt;
57 void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *, 49 void (*bp_op)(struct nilfs_bmap *, struct nilfs_btree_path *,
58 int, __u64 *, __u64 *); 50 int, __u64 *, __u64 *);
59}; 51};
60 52
@@ -80,4 +72,6 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
80 const __u64 *, const __u64 *, int); 72 const __u64 *, const __u64 *, int);
81void nilfs_btree_init_gc(struct nilfs_bmap *); 73void nilfs_btree_init_gc(struct nilfs_bmap *);
82 74
75int nilfs_btree_broken_node_block(struct buffer_head *bh);
76
83#endif /* _NILFS_BTREE_H */ 77#endif /* _NILFS_BTREE_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 85c89dfc71f0..cb003c8ee1f6 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -80,23 +80,10 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
80 return last_byte; 80 return last_byte;
81} 81}
82 82
83static int nilfs_prepare_chunk_uninterruptible(struct page *page, 83static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to)
84 struct address_space *mapping,
85 unsigned from, unsigned to)
86{ 84{
87 loff_t pos = page_offset(page) + from; 85 loff_t pos = page_offset(page) + from;
88 return block_write_begin(NULL, mapping, pos, to - from, 86 return __block_write_begin(page, pos, to - from, nilfs_get_block);
89 AOP_FLAG_UNINTERRUPTIBLE, &page,
90 NULL, nilfs_get_block);
91}
92
93static int nilfs_prepare_chunk(struct page *page,
94 struct address_space *mapping,
95 unsigned from, unsigned to)
96{
97 loff_t pos = page_offset(page) + from;
98 return block_write_begin(NULL, mapping, pos, to - from, 0, &page,
99 NULL, nilfs_get_block);
100} 87}
101 88
102static void nilfs_commit_chunk(struct page *page, 89static void nilfs_commit_chunk(struct page *page,
@@ -141,7 +128,7 @@ static void nilfs_check_page(struct page *page)
141 } 128 }
142 for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { 129 for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) {
143 p = (struct nilfs_dir_entry *)(kaddr + offs); 130 p = (struct nilfs_dir_entry *)(kaddr + offs);
144 rec_len = le16_to_cpu(p->rec_len); 131 rec_len = nilfs_rec_len_from_disk(p->rec_len);
145 132
146 if (rec_len < NILFS_DIR_REC_LEN(1)) 133 if (rec_len < NILFS_DIR_REC_LEN(1))
147 goto Eshort; 134 goto Eshort;
@@ -199,13 +186,10 @@ fail:
199static struct page *nilfs_get_page(struct inode *dir, unsigned long n) 186static struct page *nilfs_get_page(struct inode *dir, unsigned long n)
200{ 187{
201 struct address_space *mapping = dir->i_mapping; 188 struct address_space *mapping = dir->i_mapping;
202 struct page *page = read_cache_page(mapping, n, 189 struct page *page = read_mapping_page(mapping, n, NULL);
203 (filler_t *)mapping->a_ops->readpage, NULL); 190
204 if (!IS_ERR(page)) { 191 if (!IS_ERR(page)) {
205 wait_on_page_locked(page);
206 kmap(page); 192 kmap(page);
207 if (!PageUptodate(page))
208 goto fail;
209 if (!PageChecked(page)) 193 if (!PageChecked(page))
210 nilfs_check_page(page); 194 nilfs_check_page(page);
211 if (PageError(page)) 195 if (PageError(page))
@@ -238,7 +222,8 @@ nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de)
238 */ 222 */
239static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) 223static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p)
240{ 224{
241 return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); 225 return (struct nilfs_dir_entry *)((char *)p +
226 nilfs_rec_len_from_disk(p->rec_len));
242} 227}
243 228
244static unsigned char 229static unsigned char
@@ -329,7 +314,7 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
329 goto success; 314 goto success;
330 } 315 }
331 } 316 }
332 filp->f_pos += le16_to_cpu(de->rec_len); 317 filp->f_pos += nilfs_rec_len_from_disk(de->rec_len);
333 } 318 }
334 nilfs_put_page(page); 319 nilfs_put_page(page);
335 } 320 }
@@ -444,12 +429,12 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
444 struct page *page, struct inode *inode) 429 struct page *page, struct inode *inode)
445{ 430{
446 unsigned from = (char *) de - (char *) page_address(page); 431 unsigned from = (char *) de - (char *) page_address(page);
447 unsigned to = from + le16_to_cpu(de->rec_len); 432 unsigned to = from + nilfs_rec_len_from_disk(de->rec_len);
448 struct address_space *mapping = page->mapping; 433 struct address_space *mapping = page->mapping;
449 int err; 434 int err;
450 435
451 lock_page(page); 436 lock_page(page);
452 err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); 437 err = nilfs_prepare_chunk(page, from, to);
453 BUG_ON(err); 438 BUG_ON(err);
454 de->inode = cpu_to_le64(inode->i_ino); 439 de->inode = cpu_to_le64(inode->i_ino);
455 nilfs_set_de_type(de, inode); 440 nilfs_set_de_type(de, inode);
@@ -500,7 +485,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
500 /* We hit i_size */ 485 /* We hit i_size */
501 name_len = 0; 486 name_len = 0;
502 rec_len = chunk_size; 487 rec_len = chunk_size;
503 de->rec_len = cpu_to_le16(chunk_size); 488 de->rec_len = nilfs_rec_len_to_disk(chunk_size);
504 de->inode = 0; 489 de->inode = 0;
505 goto got_it; 490 goto got_it;
506 } 491 }
@@ -514,7 +499,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
514 if (nilfs_match(namelen, name, de)) 499 if (nilfs_match(namelen, name, de))
515 goto out_unlock; 500 goto out_unlock;
516 name_len = NILFS_DIR_REC_LEN(de->name_len); 501 name_len = NILFS_DIR_REC_LEN(de->name_len);
517 rec_len = le16_to_cpu(de->rec_len); 502 rec_len = nilfs_rec_len_from_disk(de->rec_len);
518 if (!de->inode && rec_len >= reclen) 503 if (!de->inode && rec_len >= reclen)
519 goto got_it; 504 goto got_it;
520 if (rec_len >= name_len + reclen) 505 if (rec_len >= name_len + reclen)
@@ -530,15 +515,15 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
530got_it: 515got_it:
531 from = (char *)de - (char *)page_address(page); 516 from = (char *)de - (char *)page_address(page);
532 to = from + rec_len; 517 to = from + rec_len;
533 err = nilfs_prepare_chunk(page, page->mapping, from, to); 518 err = nilfs_prepare_chunk(page, from, to);
534 if (err) 519 if (err)
535 goto out_unlock; 520 goto out_unlock;
536 if (de->inode) { 521 if (de->inode) {
537 struct nilfs_dir_entry *de1; 522 struct nilfs_dir_entry *de1;
538 523
539 de1 = (struct nilfs_dir_entry *)((char *)de + name_len); 524 de1 = (struct nilfs_dir_entry *)((char *)de + name_len);
540 de1->rec_len = cpu_to_le16(rec_len - name_len); 525 de1->rec_len = nilfs_rec_len_to_disk(rec_len - name_len);
541 de->rec_len = cpu_to_le16(name_len); 526 de->rec_len = nilfs_rec_len_to_disk(name_len);
542 de = de1; 527 de = de1;
543 } 528 }
544 de->name_len = namelen; 529 de->name_len = namelen;
@@ -569,7 +554,8 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
569 struct inode *inode = mapping->host; 554 struct inode *inode = mapping->host;
570 char *kaddr = page_address(page); 555 char *kaddr = page_address(page);
571 unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); 556 unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1);
572 unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); 557 unsigned to = ((char *)dir - kaddr) +
558 nilfs_rec_len_from_disk(dir->rec_len);
573 struct nilfs_dir_entry *pde = NULL; 559 struct nilfs_dir_entry *pde = NULL;
574 struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); 560 struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from);
575 int err; 561 int err;
@@ -587,10 +573,10 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
587 if (pde) 573 if (pde)
588 from = (char *)pde - (char *)page_address(page); 574 from = (char *)pde - (char *)page_address(page);
589 lock_page(page); 575 lock_page(page);
590 err = nilfs_prepare_chunk(page, mapping, from, to); 576 err = nilfs_prepare_chunk(page, from, to);
591 BUG_ON(err); 577 BUG_ON(err);
592 if (pde) 578 if (pde)
593 pde->rec_len = cpu_to_le16(to - from); 579 pde->rec_len = nilfs_rec_len_to_disk(to - from);
594 dir->inode = 0; 580 dir->inode = 0;
595 nilfs_commit_chunk(page, mapping, from, to); 581 nilfs_commit_chunk(page, mapping, from, to);
596 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 582 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -615,7 +601,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
615 if (!page) 601 if (!page)
616 return -ENOMEM; 602 return -ENOMEM;
617 603
618 err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); 604 err = nilfs_prepare_chunk(page, 0, chunk_size);
619 if (unlikely(err)) { 605 if (unlikely(err)) {
620 unlock_page(page); 606 unlock_page(page);
621 goto fail; 607 goto fail;
@@ -624,14 +610,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
624 memset(kaddr, 0, chunk_size); 610 memset(kaddr, 0, chunk_size);
625 de = (struct nilfs_dir_entry *)kaddr; 611 de = (struct nilfs_dir_entry *)kaddr;
626 de->name_len = 1; 612 de->name_len = 1;
627 de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1)); 613 de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1));
628 memcpy(de->name, ".\0\0", 4); 614 memcpy(de->name, ".\0\0", 4);
629 de->inode = cpu_to_le64(inode->i_ino); 615 de->inode = cpu_to_le64(inode->i_ino);
630 nilfs_set_de_type(de, inode); 616 nilfs_set_de_type(de, inode);
631 617
632 de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); 618 de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1));
633 de->name_len = 2; 619 de->name_len = 2;
634 de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1)); 620 de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1));
635 de->inode = cpu_to_le64(parent->i_ino); 621 de->inode = cpu_to_le64(parent->i_ino);
636 memcpy(de->name, "..\0", 4); 622 memcpy(de->name, "..\0", 4);
637 nilfs_set_de_type(de, inode); 623 nilfs_set_de_type(de, inode);
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 236753df5cdf..324d80c57518 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -27,47 +27,43 @@
27#include "alloc.h" 27#include "alloc.h"
28#include "dat.h" 28#include "dat.h"
29 29
30static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) 30static inline __le64 *nilfs_direct_dptrs(const struct nilfs_bmap *direct)
31{ 31{
32 return (__le64 *) 32 return (__le64 *)
33 ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1); 33 ((struct nilfs_direct_node *)direct->b_u.u_data + 1);
34} 34}
35 35
36static inline __u64 36static inline __u64
37nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key) 37nilfs_direct_get_ptr(const struct nilfs_bmap *direct, __u64 key)
38{ 38{
39 return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key)); 39 return le64_to_cpu(*(nilfs_direct_dptrs(direct) + key));
40} 40}
41 41
42static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct, 42static inline void nilfs_direct_set_ptr(struct nilfs_bmap *direct,
43 __u64 key, __u64 ptr) 43 __u64 key, __u64 ptr)
44{ 44{
45 *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr); 45 *(nilfs_direct_dptrs(direct) + key) = cpu_to_le64(ptr);
46} 46}
47 47
48static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, 48static int nilfs_direct_lookup(const struct nilfs_bmap *direct,
49 __u64 key, int level, __u64 *ptrp) 49 __u64 key, int level, __u64 *ptrp)
50{ 50{
51 struct nilfs_direct *direct;
52 __u64 ptr; 51 __u64 ptr;
53 52
54 direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
55 if (key > NILFS_DIRECT_KEY_MAX || level != 1) 53 if (key > NILFS_DIRECT_KEY_MAX || level != 1)
56 return -ENOENT; 54 return -ENOENT;
57 ptr = nilfs_direct_get_ptr(direct, key); 55 ptr = nilfs_direct_get_ptr(direct, key);
58 if (ptr == NILFS_BMAP_INVALID_PTR) 56 if (ptr == NILFS_BMAP_INVALID_PTR)
59 return -ENOENT; 57 return -ENOENT;
60 58
61 if (ptrp != NULL) 59 *ptrp = ptr;
62 *ptrp = ptr;
63 return 0; 60 return 0;
64} 61}
65 62
66static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, 63static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
67 __u64 key, __u64 *ptrp, 64 __u64 key, __u64 *ptrp,
68 unsigned maxblocks) 65 unsigned maxblocks)
69{ 66{
70 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
71 struct inode *dat = NULL; 67 struct inode *dat = NULL;
72 __u64 ptr, ptr2; 68 __u64 ptr, ptr2;
73 sector_t blocknr; 69 sector_t blocknr;
@@ -79,8 +75,8 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
79 if (ptr == NILFS_BMAP_INVALID_PTR) 75 if (ptr == NILFS_BMAP_INVALID_PTR)
80 return -ENOENT; 76 return -ENOENT;
81 77
82 if (NILFS_BMAP_USE_VBN(bmap)) { 78 if (NILFS_BMAP_USE_VBN(direct)) {
83 dat = nilfs_bmap_get_dat(bmap); 79 dat = nilfs_bmap_get_dat(direct);
84 ret = nilfs_dat_translate(dat, ptr, &blocknr); 80 ret = nilfs_dat_translate(dat, ptr, &blocknr);
85 if (ret < 0) 81 if (ret < 0)
86 return ret; 82 return ret;
@@ -106,29 +102,21 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
106} 102}
107 103
108static __u64 104static __u64
109nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) 105nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key)
110{ 106{
111 __u64 ptr; 107 __u64 ptr;
112 108
113 ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key); 109 ptr = nilfs_bmap_find_target_seq(direct, key);
114 if (ptr != NILFS_BMAP_INVALID_PTR) 110 if (ptr != NILFS_BMAP_INVALID_PTR)
115 /* sequential access */ 111 /* sequential access */
116 return ptr; 112 return ptr;
117 else 113 else
118 /* block group */ 114 /* block group */
119 return nilfs_bmap_find_target_in_group(&direct->d_bmap); 115 return nilfs_bmap_find_target_in_group(direct);
120}
121
122static void nilfs_direct_set_target_v(struct nilfs_direct *direct,
123 __u64 key, __u64 ptr)
124{
125 direct->d_bmap.b_last_allocated_key = key;
126 direct->d_bmap.b_last_allocated_ptr = ptr;
127} 116}
128 117
129static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 118static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
130{ 119{
131 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
132 union nilfs_bmap_ptr_req req; 120 union nilfs_bmap_ptr_req req;
133 struct inode *dat = NULL; 121 struct inode *dat = NULL;
134 struct buffer_head *bh; 122 struct buffer_head *bh;
@@ -136,11 +124,11 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
136 124
137 if (key > NILFS_DIRECT_KEY_MAX) 125 if (key > NILFS_DIRECT_KEY_MAX)
138 return -ENOENT; 126 return -ENOENT;
139 if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) 127 if (nilfs_direct_get_ptr(bmap, key) != NILFS_BMAP_INVALID_PTR)
140 return -EEXIST; 128 return -EEXIST;
141 129
142 if (NILFS_BMAP_USE_VBN(bmap)) { 130 if (NILFS_BMAP_USE_VBN(bmap)) {
143 req.bpr_ptr = nilfs_direct_find_target_v(direct, key); 131 req.bpr_ptr = nilfs_direct_find_target_v(bmap, key);
144 dat = nilfs_bmap_get_dat(bmap); 132 dat = nilfs_bmap_get_dat(bmap);
145 } 133 }
146 ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat); 134 ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat);
@@ -150,13 +138,13 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
150 set_buffer_nilfs_volatile(bh); 138 set_buffer_nilfs_volatile(bh);
151 139
152 nilfs_bmap_commit_alloc_ptr(bmap, &req, dat); 140 nilfs_bmap_commit_alloc_ptr(bmap, &req, dat);
153 nilfs_direct_set_ptr(direct, key, req.bpr_ptr); 141 nilfs_direct_set_ptr(bmap, key, req.bpr_ptr);
154 142
155 if (!nilfs_bmap_dirty(bmap)) 143 if (!nilfs_bmap_dirty(bmap))
156 nilfs_bmap_set_dirty(bmap); 144 nilfs_bmap_set_dirty(bmap);
157 145
158 if (NILFS_BMAP_USE_VBN(bmap)) 146 if (NILFS_BMAP_USE_VBN(bmap))
159 nilfs_direct_set_target_v(direct, key, req.bpr_ptr); 147 nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr);
160 148
161 nilfs_bmap_add_blocks(bmap, 1); 149 nilfs_bmap_add_blocks(bmap, 1);
162 } 150 }
@@ -165,33 +153,30 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
165 153
166static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) 154static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
167{ 155{
168 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
169 union nilfs_bmap_ptr_req req; 156 union nilfs_bmap_ptr_req req;
170 struct inode *dat; 157 struct inode *dat;
171 int ret; 158 int ret;
172 159
173 if (key > NILFS_DIRECT_KEY_MAX || 160 if (key > NILFS_DIRECT_KEY_MAX ||
174 nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) 161 nilfs_direct_get_ptr(bmap, key) == NILFS_BMAP_INVALID_PTR)
175 return -ENOENT; 162 return -ENOENT;
176 163
177 dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; 164 dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
178 req.bpr_ptr = nilfs_direct_get_ptr(direct, key); 165 req.bpr_ptr = nilfs_direct_get_ptr(bmap, key);
179 166
180 ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat); 167 ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat);
181 if (!ret) { 168 if (!ret) {
182 nilfs_bmap_commit_end_ptr(bmap, &req, dat); 169 nilfs_bmap_commit_end_ptr(bmap, &req, dat);
183 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); 170 nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR);
184 nilfs_bmap_sub_blocks(bmap, 1); 171 nilfs_bmap_sub_blocks(bmap, 1);
185 } 172 }
186 return ret; 173 return ret;
187} 174}
188 175
189static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) 176static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp)
190{ 177{
191 struct nilfs_direct *direct;
192 __u64 key, lastkey; 178 __u64 key, lastkey;
193 179
194 direct = (struct nilfs_direct *)bmap;
195 lastkey = NILFS_DIRECT_KEY_MAX + 1; 180 lastkey = NILFS_DIRECT_KEY_MAX + 1;
196 for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) 181 for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++)
197 if (nilfs_direct_get_ptr(direct, key) != 182 if (nilfs_direct_get_ptr(direct, key) !=
@@ -211,15 +196,13 @@ static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key)
211 return key > NILFS_DIRECT_KEY_MAX; 196 return key > NILFS_DIRECT_KEY_MAX;
212} 197}
213 198
214static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, 199static int nilfs_direct_gather_data(struct nilfs_bmap *direct,
215 __u64 *keys, __u64 *ptrs, int nitems) 200 __u64 *keys, __u64 *ptrs, int nitems)
216{ 201{
217 struct nilfs_direct *direct;
218 __u64 key; 202 __u64 key;
219 __u64 ptr; 203 __u64 ptr;
220 int n; 204 int n;
221 205
222 direct = (struct nilfs_direct *)bmap;
223 if (nitems > NILFS_DIRECT_NBLOCKS) 206 if (nitems > NILFS_DIRECT_NBLOCKS)
224 nitems = NILFS_DIRECT_NBLOCKS; 207 nitems = NILFS_DIRECT_NBLOCKS;
225 n = 0; 208 n = 0;
@@ -237,7 +220,6 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap,
237int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, 220int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
238 __u64 key, __u64 *keys, __u64 *ptrs, int n) 221 __u64 key, __u64 *keys, __u64 *ptrs, int n)
239{ 222{
240 struct nilfs_direct *direct;
241 __le64 *dptrs; 223 __le64 *dptrs;
242 int ret, i, j; 224 int ret, i, j;
243 225
@@ -253,12 +235,11 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
253 bmap->b_ops->bop_clear(bmap); 235 bmap->b_ops->bop_clear(bmap);
254 236
255 /* convert */ 237 /* convert */
256 direct = (struct nilfs_direct *)bmap; 238 dptrs = nilfs_direct_dptrs(bmap);
257 dptrs = nilfs_direct_dptrs(direct);
258 for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { 239 for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) {
259 if ((j < n) && (i == keys[j])) { 240 if ((j < n) && (i == keys[j])) {
260 dptrs[i] = (i != key) ? 241 dptrs[i] = (i != key) ?
261 nilfs_bmap_ptr_to_dptr(ptrs[j]) : 242 cpu_to_le64(ptrs[j]) :
262 NILFS_BMAP_INVALID_PTR; 243 NILFS_BMAP_INVALID_PTR;
263 j++; 244 j++;
264 } else 245 } else
@@ -269,10 +250,9 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
269 return 0; 250 return 0;
270} 251}
271 252
272static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, 253static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
273 struct buffer_head *bh) 254 struct buffer_head *bh)
274{ 255{
275 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
276 struct nilfs_palloc_req oldreq, newreq; 256 struct nilfs_palloc_req oldreq, newreq;
277 struct inode *dat; 257 struct inode *dat;
278 __u64 key; 258 __u64 key;
@@ -284,7 +264,7 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
284 264
285 dat = nilfs_bmap_get_dat(bmap); 265 dat = nilfs_bmap_get_dat(bmap);
286 key = nilfs_bmap_data_get_key(bmap, bh); 266 key = nilfs_bmap_data_get_key(bmap, bh);
287 ptr = nilfs_direct_get_ptr(direct, key); 267 ptr = nilfs_direct_get_ptr(bmap, key);
288 if (!buffer_nilfs_volatile(bh)) { 268 if (!buffer_nilfs_volatile(bh)) {
289 oldreq.pr_entry_nr = ptr; 269 oldreq.pr_entry_nr = ptr;
290 newreq.pr_entry_nr = ptr; 270 newreq.pr_entry_nr = ptr;
@@ -294,20 +274,20 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
294 nilfs_dat_commit_update(dat, &oldreq, &newreq, 274 nilfs_dat_commit_update(dat, &oldreq, &newreq,
295 bmap->b_ptr_type == NILFS_BMAP_PTR_VS); 275 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
296 set_buffer_nilfs_volatile(bh); 276 set_buffer_nilfs_volatile(bh);
297 nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr); 277 nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr);
298 } else 278 } else
299 ret = nilfs_dat_mark_dirty(dat, ptr); 279 ret = nilfs_dat_mark_dirty(dat, ptr);
300 280
301 return ret; 281 return ret;
302} 282}
303 283
304static int nilfs_direct_assign_v(struct nilfs_direct *direct, 284static int nilfs_direct_assign_v(struct nilfs_bmap *direct,
305 __u64 key, __u64 ptr, 285 __u64 key, __u64 ptr,
306 struct buffer_head **bh, 286 struct buffer_head **bh,
307 sector_t blocknr, 287 sector_t blocknr,
308 union nilfs_binfo *binfo) 288 union nilfs_binfo *binfo)
309{ 289{
310 struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap); 290 struct inode *dat = nilfs_bmap_get_dat(direct);
311 union nilfs_bmap_ptr_req req; 291 union nilfs_bmap_ptr_req req;
312 int ret; 292 int ret;
313 293
@@ -315,13 +295,13 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct,
315 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 295 ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
316 if (!ret) { 296 if (!ret) {
317 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 297 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
318 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); 298 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
319 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 299 binfo->bi_v.bi_blkoff = cpu_to_le64(key);
320 } 300 }
321 return ret; 301 return ret;
322} 302}
323 303
324static int nilfs_direct_assign_p(struct nilfs_direct *direct, 304static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
325 __u64 key, __u64 ptr, 305 __u64 key, __u64 ptr,
326 struct buffer_head **bh, 306 struct buffer_head **bh,
327 sector_t blocknr, 307 sector_t blocknr,
@@ -329,7 +309,7 @@ static int nilfs_direct_assign_p(struct nilfs_direct *direct,
329{ 309{
330 nilfs_direct_set_ptr(direct, key, blocknr); 310 nilfs_direct_set_ptr(direct, key, blocknr);
331 311
332 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); 312 binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
333 binfo->bi_dat.bi_level = 0; 313 binfo->bi_dat.bi_level = 0;
334 314
335 return 0; 315 return 0;
@@ -340,18 +320,16 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
340 sector_t blocknr, 320 sector_t blocknr,
341 union nilfs_binfo *binfo) 321 union nilfs_binfo *binfo)
342{ 322{
343 struct nilfs_direct *direct;
344 __u64 key; 323 __u64 key;
345 __u64 ptr; 324 __u64 ptr;
346 325
347 direct = (struct nilfs_direct *)bmap;
348 key = nilfs_bmap_data_get_key(bmap, *bh); 326 key = nilfs_bmap_data_get_key(bmap, *bh);
349 if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { 327 if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
350 printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, 328 printk(KERN_CRIT "%s: invalid key: %llu\n", __func__,
351 (unsigned long long)key); 329 (unsigned long long)key);
352 return -EINVAL; 330 return -EINVAL;
353 } 331 }
354 ptr = nilfs_direct_get_ptr(direct, key); 332 ptr = nilfs_direct_get_ptr(bmap, key);
355 if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { 333 if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
356 printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, 334 printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__,
357 (unsigned long long)ptr); 335 (unsigned long long)ptr);
@@ -359,8 +337,8 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
359 } 337 }
360 338
361 return NILFS_BMAP_USE_VBN(bmap) ? 339 return NILFS_BMAP_USE_VBN(bmap) ?
362 nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) : 340 nilfs_direct_assign_v(bmap, key, ptr, bh, blocknr, binfo) :
363 nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo); 341 nilfs_direct_assign_p(bmap, key, ptr, bh, blocknr, binfo);
364} 342}
365 343
366static const struct nilfs_bmap_operations nilfs_direct_ops = { 344static const struct nilfs_bmap_operations nilfs_direct_ops = {
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h
index a5ffd66e25d0..dc643de20a25 100644
--- a/fs/nilfs2/direct.h
+++ b/fs/nilfs2/direct.h
@@ -28,8 +28,6 @@
28#include "bmap.h" 28#include "bmap.h"
29 29
30 30
31struct nilfs_direct;
32
33/** 31/**
34 * struct nilfs_direct_node - direct node 32 * struct nilfs_direct_node - direct node
35 * @dn_flags: flags 33 * @dn_flags: flags
@@ -40,15 +38,6 @@ struct nilfs_direct_node {
40 __u8 pad[7]; 38 __u8 pad[7];
41}; 39};
42 40
43/**
44 * struct nilfs_direct - direct mapping
45 * @d_bmap: bmap structure
46 */
47struct nilfs_direct {
48 struct nilfs_bmap d_bmap;
49};
50
51
52#define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) 41#define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1)
53#define NILFS_DIRECT_KEY_MIN 0 42#define NILFS_DIRECT_KEY_MIN 0
54#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) 43#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1)
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index dd5f7e0a95f6..84a45d1d5464 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -78,7 +78,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
78 struct inode *gcdat = nilfs->ns_gc_dat; 78 struct inode *gcdat = nilfs->ns_gc_dat;
79 struct nilfs_inode_info *gii = NILFS_I(gcdat); 79 struct nilfs_inode_info *gii = NILFS_I(gcdat);
80 80
81 gcdat->i_state = I_CLEAR; 81 gcdat->i_state = I_FREEING | I_CLEAR;
82 gii->i_flags = 0; 82 gii->i_flags = 0;
83 83
84 nilfs_palloc_clear_cache(gcdat); 84 nilfs_palloc_clear_cache(gcdat);
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 145f03cd7d3e..bed3a783129b 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,6 +48,8 @@
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/swap.h> 49#include <linux/swap.h>
50#include "nilfs.h" 50#include "nilfs.h"
51#include "btree.h"
52#include "btnode.h"
51#include "page.h" 53#include "page.h"
52#include "mdt.h" 54#include "mdt.h"
53#include "dat.h" 55#include "dat.h"
@@ -149,8 +151,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
149int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, 151int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
150 __u64 vbn, struct buffer_head **out_bh) 152 __u64 vbn, struct buffer_head **out_bh)
151{ 153{
152 int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, 154 int ret;
153 vbn ? : pbn, pbn, out_bh); 155
156 ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
157 vbn ? : pbn, pbn, READ, out_bh, &pbn);
154 if (ret == -EEXIST) /* internal code (cache hit) */ 158 if (ret == -EEXIST) /* internal code (cache hit) */
155 ret = 0; 159 ret = 0;
156 return ret; 160 return ret;
@@ -164,10 +168,15 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
164 if (buffer_dirty(bh)) 168 if (buffer_dirty(bh))
165 return -EEXIST; 169 return -EEXIST;
166 170
167 if (buffer_nilfs_node(bh)) 171 if (buffer_nilfs_node(bh)) {
172 if (nilfs_btree_broken_node_block(bh)) {
173 clear_buffer_uptodate(bh);
174 return -EIO;
175 }
168 nilfs_btnode_mark_dirty(bh); 176 nilfs_btnode_mark_dirty(bh);
169 else 177 } else {
170 nilfs_mdt_mark_buffer_dirty(bh); 178 nilfs_mdt_mark_buffer_dirty(bh);
179 }
171 return 0; 180 return 0;
172} 181}
173 182
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 39e038ac8fcb..eccb2f2e2315 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -27,6 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/uio.h> 28#include <linux/uio.h>
29#include "nilfs.h" 29#include "nilfs.h"
30#include "btnode.h"
30#include "segment.h" 31#include "segment.h"
31#include "page.h" 32#include "page.h"
32#include "mdt.h" 33#include "mdt.h"
@@ -197,11 +198,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
197 if (unlikely(err)) 198 if (unlikely(err))
198 return err; 199 return err;
199 200
200 *pagep = NULL; 201 err = block_write_begin(mapping, pos, len, flags, pagep,
201 err = block_write_begin(file, mapping, pos, len, flags, pagep, 202 nilfs_get_block);
202 fsdata, nilfs_get_block); 203 if (unlikely(err)) {
203 if (unlikely(err)) 204 loff_t isize = mapping->host->i_size;
205 if (pos + len > isize)
206 vmtruncate(mapping->host, isize);
207
204 nilfs_transaction_abort(inode->i_sb); 208 nilfs_transaction_abort(inode->i_sb);
209 }
205 return err; 210 return err;
206} 211}
207 212
@@ -237,6 +242,19 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
237 /* Needs synchronization with the cleaner */ 242 /* Needs synchronization with the cleaner */
238 size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 243 size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
239 offset, nr_segs, nilfs_get_block, NULL); 244 offset, nr_segs, nilfs_get_block, NULL);
245
246 /*
247 * In case of error extending write may have instantiated a few
248 * blocks outside i_size. Trim these off again.
249 */
250 if (unlikely((rw & WRITE) && size < 0)) {
251 loff_t isize = i_size_read(inode);
252 loff_t end = offset + iov_length(iov, nr_segs);
253
254 if (end > isize)
255 vmtruncate(inode, isize);
256 }
257
240 return size; 258 return size;
241} 259}
242 260
@@ -337,7 +355,6 @@ void nilfs_free_inode(struct inode *inode)
337 struct super_block *sb = inode->i_sb; 355 struct super_block *sb = inode->i_sb;
338 struct nilfs_sb_info *sbi = NILFS_SB(sb); 356 struct nilfs_sb_info *sbi = NILFS_SB(sb);
339 357
340 clear_inode(inode);
341 /* XXX: check error code? Is there any thing I can do? */ 358 /* XXX: check error code? Is there any thing I can do? */
342 (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); 359 (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino);
343 atomic_dec(&sbi->s_inodes_count); 360 atomic_dec(&sbi->s_inodes_count);
@@ -597,16 +614,34 @@ void nilfs_truncate(struct inode *inode)
597 But truncate has no return value. */ 614 But truncate has no return value. */
598} 615}
599 616
600void nilfs_delete_inode(struct inode *inode) 617static void nilfs_clear_inode(struct inode *inode)
618{
619 struct nilfs_inode_info *ii = NILFS_I(inode);
620
621 /*
622 * Free resources allocated in nilfs_read_inode(), here.
623 */
624 BUG_ON(!list_empty(&ii->i_dirty));
625 brelse(ii->i_bh);
626 ii->i_bh = NULL;
627
628 if (test_bit(NILFS_I_BMAP, &ii->i_state))
629 nilfs_bmap_clear(ii->i_bmap);
630
631 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
632}
633
634void nilfs_evict_inode(struct inode *inode)
601{ 635{
602 struct nilfs_transaction_info ti; 636 struct nilfs_transaction_info ti;
603 struct super_block *sb = inode->i_sb; 637 struct super_block *sb = inode->i_sb;
604 struct nilfs_inode_info *ii = NILFS_I(inode); 638 struct nilfs_inode_info *ii = NILFS_I(inode);
605 639
606 if (unlikely(is_bad_inode(inode))) { 640 if (inode->i_nlink || unlikely(is_bad_inode(inode))) {
607 if (inode->i_data.nrpages) 641 if (inode->i_data.nrpages)
608 truncate_inode_pages(&inode->i_data, 0); 642 truncate_inode_pages(&inode->i_data, 0);
609 clear_inode(inode); 643 end_writeback(inode);
644 nilfs_clear_inode(inode);
610 return; 645 return;
611 } 646 }
612 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 647 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
@@ -616,6 +651,8 @@ void nilfs_delete_inode(struct inode *inode)
616 651
617 nilfs_truncate_bmap(ii, 0); 652 nilfs_truncate_bmap(ii, 0);
618 nilfs_mark_inode_dirty(inode); 653 nilfs_mark_inode_dirty(inode);
654 end_writeback(inode);
655 nilfs_clear_inode(inode);
619 nilfs_free_inode(inode); 656 nilfs_free_inode(inode);
620 /* nilfs_free_inode() marks inode buffer dirty */ 657 /* nilfs_free_inode() marks inode buffer dirty */
621 if (IS_SYNC(inode)) 658 if (IS_SYNC(inode))
@@ -639,14 +676,27 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
639 err = nilfs_transaction_begin(sb, &ti, 0); 676 err = nilfs_transaction_begin(sb, &ti, 0);
640 if (unlikely(err)) 677 if (unlikely(err))
641 return err; 678 return err;
642 err = inode_setattr(inode, iattr); 679
643 if (!err && (iattr->ia_valid & ATTR_MODE)) 680 if ((iattr->ia_valid & ATTR_SIZE) &&
681 iattr->ia_size != i_size_read(inode)) {
682 err = vmtruncate(inode, iattr->ia_size);
683 if (unlikely(err))
684 goto out_err;
685 }
686
687 setattr_copy(inode, iattr);
688 mark_inode_dirty(inode);
689
690 if (iattr->ia_valid & ATTR_MODE) {
644 err = nilfs_acl_chmod(inode); 691 err = nilfs_acl_chmod(inode);
645 if (likely(!err)) 692 if (unlikely(err))
646 err = nilfs_transaction_commit(sb); 693 goto out_err;
647 else 694 }
648 nilfs_transaction_abort(sb); 695
696 return nilfs_transaction_commit(sb);
649 697
698out_err:
699 nilfs_transaction_abort(sb);
650 return err; 700 return err;
651} 701}
652 702
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 024be8c35bb6..d01aff4957d9 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -28,6 +28,7 @@
28#include <linux/swap.h> 28#include <linux/swap.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include "nilfs.h" 30#include "nilfs.h"
31#include "btnode.h"
31#include "segment.h" 32#include "segment.h"
32#include "page.h" 33#include "page.h"
33#include "mdt.h" 34#include "mdt.h"
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 47d6d7928122..d3d54046e5f8 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -32,7 +32,6 @@
32#include "the_nilfs.h" 32#include "the_nilfs.h"
33#include "sb.h" 33#include "sb.h"
34#include "bmap.h" 34#include "bmap.h"
35#include "bmap_union.h"
36 35
37/* 36/*
38 * nilfs inode data in memory 37 * nilfs inode data in memory
@@ -41,7 +40,7 @@ struct nilfs_inode_info {
41 __u32 i_flags; 40 __u32 i_flags;
42 unsigned long i_state; /* Dynamic state flags */ 41 unsigned long i_state; /* Dynamic state flags */
43 struct nilfs_bmap *i_bmap; 42 struct nilfs_bmap *i_bmap;
44 union nilfs_bmap_union i_bmap_union; 43 struct nilfs_bmap i_bmap_data;
45 __u64 i_xattr; /* sector_t ??? */ 44 __u64 i_xattr; /* sector_t ??? */
46 __u32 i_dir_start_lookup; 45 __u32 i_dir_start_lookup;
47 __u64 i_cno; /* check point number for GC inode */ 46 __u64 i_cno; /* check point number for GC inode */
@@ -71,9 +70,7 @@ static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode)
71static inline struct nilfs_inode_info * 70static inline struct nilfs_inode_info *
72NILFS_BMAP_I(const struct nilfs_bmap *bmap) 71NILFS_BMAP_I(const struct nilfs_bmap *bmap)
73{ 72{
74 return container_of((union nilfs_bmap_union *)bmap, 73 return container_of(bmap, struct nilfs_inode_info, i_bmap_data);
75 struct nilfs_inode_info,
76 i_bmap_union);
77} 74}
78 75
79static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) 76static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
@@ -107,6 +104,14 @@ enum {
107}; 104};
108 105
109/* 106/*
107 * commit flags for nilfs_commit_super and nilfs_sync_super
108 */
109enum {
110 NILFS_SB_COMMIT = 0, /* Commit a super block alternately */
111 NILFS_SB_COMMIT_ALL /* Commit both super blocks */
112};
113
114/*
110 * Macros to check inode numbers 115 * Macros to check inode numbers
111 */ 116 */
112#define NILFS_MDT_INO_BITS \ 117#define NILFS_MDT_INO_BITS \
@@ -245,7 +250,7 @@ extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
245extern struct inode *nilfs_iget(struct super_block *, unsigned long); 250extern struct inode *nilfs_iget(struct super_block *, unsigned long);
246extern void nilfs_update_inode(struct inode *, struct buffer_head *); 251extern void nilfs_update_inode(struct inode *, struct buffer_head *);
247extern void nilfs_truncate(struct inode *); 252extern void nilfs_truncate(struct inode *);
248extern void nilfs_delete_inode(struct inode *); 253extern void nilfs_evict_inode(struct inode *);
249extern int nilfs_setattr(struct dentry *, struct iattr *); 254extern int nilfs_setattr(struct dentry *, struct iattr *);
250extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, 255extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
251 struct buffer_head **); 256 struct buffer_head **);
@@ -270,7 +275,14 @@ extern struct nilfs_super_block *
270nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); 275nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
271extern int nilfs_store_magic_and_option(struct super_block *, 276extern int nilfs_store_magic_and_option(struct super_block *,
272 struct nilfs_super_block *, char *); 277 struct nilfs_super_block *, char *);
278extern int nilfs_check_feature_compatibility(struct super_block *,
279 struct nilfs_super_block *);
280extern void nilfs_set_log_cursor(struct nilfs_super_block *,
281 struct the_nilfs *);
282extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *,
283 int flip);
273extern int nilfs_commit_super(struct nilfs_sb_info *, int); 284extern int nilfs_commit_super(struct nilfs_sb_info *, int);
285extern int nilfs_cleanup_super(struct nilfs_sb_info *);
274extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); 286extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64);
275extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); 287extern void nilfs_detach_checkpoint(struct nilfs_sb_info *);
276 288
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 8de3e1e48130..aab11db2cb08 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,7 +37,8 @@
37 37
38#define NILFS_BUFFER_INHERENT_BITS \ 38#define NILFS_BUFFER_INHERENT_BITS \
39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
41 (1UL << BH_NILFS_Checked))
41 42
42static struct buffer_head * 43static struct buffer_head *
43__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 44__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -129,6 +130,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
129 130
130 lock_buffer(bh); 131 lock_buffer(bh);
131 clear_buffer_nilfs_volatile(bh); 132 clear_buffer_nilfs_volatile(bh);
133 clear_buffer_nilfs_checked(bh);
132 clear_buffer_dirty(bh); 134 clear_buffer_dirty(bh);
133 if (nilfs_page_buffers_clean(page)) 135 if (nilfs_page_buffers_clean(page))
134 __nilfs_clear_page_dirty(page); 136 __nilfs_clear_page_dirty(page);
@@ -480,6 +482,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping)
480 lock_buffer(bh); 482 lock_buffer(bh);
481 clear_buffer_dirty(bh); 483 clear_buffer_dirty(bh);
482 clear_buffer_nilfs_volatile(bh); 484 clear_buffer_nilfs_volatile(bh);
485 clear_buffer_nilfs_checked(bh);
483 clear_buffer_uptodate(bh); 486 clear_buffer_uptodate(bh);
484 clear_buffer_mapped(bh); 487 clear_buffer_mapped(bh);
485 unlock_buffer(bh); 488 unlock_buffer(bh);
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 8abca4d1c1f8..f53d8da41ed7 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -34,11 +34,13 @@ enum {
34 BH_NILFS_Allocated = BH_PrivateStart, 34 BH_NILFS_Allocated = BH_PrivateStart,
35 BH_NILFS_Node, 35 BH_NILFS_Node,
36 BH_NILFS_Volatile, 36 BH_NILFS_Volatile,
37 BH_NILFS_Checked,
37}; 38};
38 39
39BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ 40BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
40BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ 41BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
41BUFFER_FNS(NILFS_Volatile, nilfs_volatile) 42BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
43BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
42 44
43 45
44void nilfs_mark_buffer_dirty(struct buffer_head *bh); 46void nilfs_mark_buffer_dirty(struct buffer_head *bh);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index bae2a516b4ee..d0c35ef39f6a 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -91,27 +91,9 @@ static int nilfs_warn_segment_error(int err)
91 return -EINVAL; 91 return -EINVAL;
92} 92}
93 93
94static void store_segsum_info(struct nilfs_segsum_info *ssi,
95 struct nilfs_segment_summary *sum,
96 unsigned int blocksize)
97{
98 ssi->flags = le16_to_cpu(sum->ss_flags);
99 ssi->seg_seq = le64_to_cpu(sum->ss_seq);
100 ssi->ctime = le64_to_cpu(sum->ss_create);
101 ssi->next = le64_to_cpu(sum->ss_next);
102 ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
103 ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
104 ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
105
106 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
107 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
108
109 /* need to verify ->ss_bytes field if read ->ss_cno */
110}
111
112/** 94/**
113 * calc_crc_cont - check CRC of blocks continuously 95 * nilfs_compute_checksum - compute checksum of blocks continuously
114 * @sbi: nilfs_sb_info 96 * @nilfs: nilfs object
115 * @bhs: buffer head of start block 97 * @bhs: buffer head of start block
116 * @sum: place to store result 98 * @sum: place to store result
117 * @offset: offset bytes in the first block 99 * @offset: offset bytes in the first block
@@ -119,23 +101,25 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi,
119 * @start: DBN of start block 101 * @start: DBN of start block
120 * @nblock: number of blocks to be checked 102 * @nblock: number of blocks to be checked
121 */ 103 */
122static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, 104static int nilfs_compute_checksum(struct the_nilfs *nilfs,
123 u32 *sum, unsigned long offset, u64 check_bytes, 105 struct buffer_head *bhs, u32 *sum,
124 sector_t start, unsigned long nblock) 106 unsigned long offset, u64 check_bytes,
107 sector_t start, unsigned long nblock)
125{ 108{
126 unsigned long blocksize = sbi->s_super->s_blocksize; 109 unsigned int blocksize = nilfs->ns_blocksize;
127 unsigned long size; 110 unsigned long size;
128 u32 crc; 111 u32 crc;
129 112
130 BUG_ON(offset >= blocksize); 113 BUG_ON(offset >= blocksize);
131 check_bytes -= offset; 114 check_bytes -= offset;
132 size = min_t(u64, check_bytes, blocksize - offset); 115 size = min_t(u64, check_bytes, blocksize - offset);
133 crc = crc32_le(sbi->s_nilfs->ns_crc_seed, 116 crc = crc32_le(nilfs->ns_crc_seed,
134 (unsigned char *)bhs->b_data + offset, size); 117 (unsigned char *)bhs->b_data + offset, size);
135 if (--nblock > 0) { 118 if (--nblock > 0) {
136 do { 119 do {
137 struct buffer_head *bh 120 struct buffer_head *bh;
138 = sb_bread(sbi->s_super, ++start); 121
122 bh = __bread(nilfs->ns_bdev, ++start, blocksize);
139 if (!bh) 123 if (!bh)
140 return -EIO; 124 return -EIO;
141 check_bytes -= size; 125 check_bytes -= size;
@@ -150,12 +134,12 @@ static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
150 134
151/** 135/**
152 * nilfs_read_super_root_block - read super root block 136 * nilfs_read_super_root_block - read super root block
153 * @sb: super_block 137 * @nilfs: nilfs object
154 * @sr_block: disk block number of the super root block 138 * @sr_block: disk block number of the super root block
155 * @pbh: address of a buffer_head pointer to return super root buffer 139 * @pbh: address of a buffer_head pointer to return super root buffer
156 * @check: CRC check flag 140 * @check: CRC check flag
157 */ 141 */
158int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, 142int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
159 struct buffer_head **pbh, int check) 143 struct buffer_head **pbh, int check)
160{ 144{
161 struct buffer_head *bh_sr; 145 struct buffer_head *bh_sr;
@@ -164,7 +148,7 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
164 int ret; 148 int ret;
165 149
166 *pbh = NULL; 150 *pbh = NULL;
167 bh_sr = sb_bread(sb, sr_block); 151 bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize);
168 if (unlikely(!bh_sr)) { 152 if (unlikely(!bh_sr)) {
169 ret = NILFS_SEG_FAIL_IO; 153 ret = NILFS_SEG_FAIL_IO;
170 goto failed; 154 goto failed;
@@ -174,12 +158,13 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
174 if (check) { 158 if (check) {
175 unsigned bytes = le16_to_cpu(sr->sr_bytes); 159 unsigned bytes = le16_to_cpu(sr->sr_bytes);
176 160
177 if (bytes == 0 || bytes > sb->s_blocksize) { 161 if (bytes == 0 || bytes > nilfs->ns_blocksize) {
178 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; 162 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
179 goto failed_bh; 163 goto failed_bh;
180 } 164 }
181 if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc, 165 if (nilfs_compute_checksum(
182 sizeof(sr->sr_sum), bytes, sr_block, 1)) { 166 nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes,
167 sr_block, 1)) {
183 ret = NILFS_SEG_FAIL_IO; 168 ret = NILFS_SEG_FAIL_IO;
184 goto failed_bh; 169 goto failed_bh;
185 } 170 }
@@ -199,64 +184,76 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
199} 184}
200 185
201/** 186/**
202 * load_segment_summary - read segment summary of the specified partial segment 187 * nilfs_read_log_header - read summary header of the specified log
203 * @sbi: nilfs_sb_info 188 * @nilfs: nilfs object
204 * @pseg_start: start disk block number of partial segment 189 * @start_blocknr: start block number of the log
205 * @seg_seq: sequence number requested 190 * @sum: pointer to return segment summary structure
206 * @ssi: pointer to nilfs_segsum_info struct to store information
207 */ 191 */
208static int 192static struct buffer_head *
209load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start, 193nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
210 u64 seg_seq, struct nilfs_segsum_info *ssi) 194 struct nilfs_segment_summary **sum)
211{ 195{
212 struct buffer_head *bh_sum; 196 struct buffer_head *bh_sum;
213 struct nilfs_segment_summary *sum; 197
198 bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
199 if (bh_sum)
200 *sum = (struct nilfs_segment_summary *)bh_sum->b_data;
201 return bh_sum;
202}
203
204/**
205 * nilfs_validate_log - verify consistency of log
206 * @nilfs: nilfs object
207 * @seg_seq: sequence number of segment
208 * @bh_sum: buffer head of summary block
209 * @sum: segment summary struct
210 */
211static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq,
212 struct buffer_head *bh_sum,
213 struct nilfs_segment_summary *sum)
214{
214 unsigned long nblock; 215 unsigned long nblock;
215 u32 crc; 216 u32 crc;
216 int ret = NILFS_SEG_FAIL_IO; 217 int ret;
217 218
218 bh_sum = sb_bread(sbi->s_super, pseg_start); 219 ret = NILFS_SEG_FAIL_MAGIC;
219 if (!bh_sum) 220 if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC)
220 goto out; 221 goto out;
221 222
222 sum = (struct nilfs_segment_summary *)bh_sum->b_data; 223 ret = NILFS_SEG_FAIL_SEQ;
223 224 if (le64_to_cpu(sum->ss_seq) != seg_seq)
224 /* Check consistency of segment summary */ 225 goto out;
225 if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
226 ret = NILFS_SEG_FAIL_MAGIC;
227 goto failed;
228 }
229 store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
230 if (seg_seq != ssi->seg_seq) {
231 ret = NILFS_SEG_FAIL_SEQ;
232 goto failed;
233 }
234 226
235 nblock = ssi->nblocks; 227 nblock = le32_to_cpu(sum->ss_nblocks);
236 if (unlikely(nblock == 0 || 228 ret = NILFS_SEG_FAIL_CONSISTENCY;
237 nblock > sbi->s_nilfs->ns_blocks_per_segment)) { 229 if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment))
238 /* This limits the number of blocks read in the CRC check */ 230 /* This limits the number of blocks read in the CRC check */
239 ret = NILFS_SEG_FAIL_CONSISTENCY; 231 goto out;
240 goto failed; 232
241 } 233 ret = NILFS_SEG_FAIL_IO;
242 if (calc_crc_cont(sbi, bh_sum, &crc, sizeof(sum->ss_datasum), 234 if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum),
243 ((u64)nblock << sbi->s_super->s_blocksize_bits), 235 ((u64)nblock << nilfs->ns_blocksize_bits),
244 pseg_start, nblock)) { 236 bh_sum->b_blocknr, nblock))
245 ret = NILFS_SEG_FAIL_IO; 237 goto out;
246 goto failed; 238
247 } 239 ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
248 if (crc == le32_to_cpu(sum->ss_datasum)) 240 if (crc != le32_to_cpu(sum->ss_datasum))
249 ret = 0; 241 goto out;
250 else 242 ret = 0;
251 ret = NILFS_SEG_FAIL_CHECKSUM_FULL; 243out:
252 failed:
253 brelse(bh_sum);
254 out:
255 return ret; 244 return ret;
256} 245}
257 246
258static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, 247/**
259 unsigned int *offset, unsigned int bytes) 248 * nilfs_read_summary_info - read an item on summary blocks of a log
249 * @nilfs: nilfs object
250 * @pbh: the current buffer head on summary blocks [in, out]
251 * @offset: the current byte offset on summary blocks [in, out]
252 * @bytes: byte size of the item to be read
253 */
254static void *nilfs_read_summary_info(struct the_nilfs *nilfs,
255 struct buffer_head **pbh,
256 unsigned int *offset, unsigned int bytes)
260{ 257{
261 void *ptr; 258 void *ptr;
262 sector_t blocknr; 259 sector_t blocknr;
@@ -265,7 +262,8 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
265 if (bytes > (*pbh)->b_size - *offset) { 262 if (bytes > (*pbh)->b_size - *offset) {
266 blocknr = (*pbh)->b_blocknr; 263 blocknr = (*pbh)->b_blocknr;
267 brelse(*pbh); 264 brelse(*pbh);
268 *pbh = sb_bread(sb, blocknr + 1); 265 *pbh = __bread(nilfs->ns_bdev, blocknr + 1,
266 nilfs->ns_blocksize);
269 if (unlikely(!*pbh)) 267 if (unlikely(!*pbh))
270 return NULL; 268 return NULL;
271 *offset = 0; 269 *offset = 0;
@@ -275,9 +273,18 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
275 return ptr; 273 return ptr;
276} 274}
277 275
278static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, 276/**
279 unsigned int *offset, unsigned int bytes, 277 * nilfs_skip_summary_info - skip items on summary blocks of a log
280 unsigned long count) 278 * @nilfs: nilfs object
279 * @pbh: the current buffer head on summary blocks [in, out]
280 * @offset: the current byte offset on summary blocks [in, out]
281 * @bytes: byte size of the item to be skipped
282 * @count: number of items to be skipped
283 */
284static void nilfs_skip_summary_info(struct the_nilfs *nilfs,
285 struct buffer_head **pbh,
286 unsigned int *offset, unsigned int bytes,
287 unsigned long count)
281{ 288{
282 unsigned int rest_item_in_current_block 289 unsigned int rest_item_in_current_block
283 = ((*pbh)->b_size - *offset) / bytes; 290 = ((*pbh)->b_size - *offset) / bytes;
@@ -294,36 +301,46 @@ static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
294 *offset = bytes * (count - (bcnt - 1) * nitem_per_block); 301 *offset = bytes * (count - (bcnt - 1) * nitem_per_block);
295 302
296 brelse(*pbh); 303 brelse(*pbh);
297 *pbh = sb_bread(sb, blocknr + bcnt); 304 *pbh = __bread(nilfs->ns_bdev, blocknr + bcnt,
305 nilfs->ns_blocksize);
298 } 306 }
299} 307}
300 308
301static int 309/**
302collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, 310 * nilfs_scan_dsync_log - get block information of a log written for data sync
303 struct nilfs_segsum_info *ssi, 311 * @nilfs: nilfs object
304 struct list_head *head) 312 * @start_blocknr: start block number of the log
313 * @sum: log summary information
314 * @head: list head to add nilfs_recovery_block struct
315 */
316static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
317 struct nilfs_segment_summary *sum,
318 struct list_head *head)
305{ 319{
306 struct buffer_head *bh; 320 struct buffer_head *bh;
307 unsigned int offset; 321 unsigned int offset;
308 unsigned long nfinfo = ssi->nfinfo; 322 u32 nfinfo, sumbytes;
309 sector_t blocknr = sum_blocknr + ssi->nsumblk; 323 sector_t blocknr;
310 ino_t ino; 324 ino_t ino;
311 int err = -EIO; 325 int err = -EIO;
312 326
327 nfinfo = le32_to_cpu(sum->ss_nfinfo);
313 if (!nfinfo) 328 if (!nfinfo)
314 return 0; 329 return 0;
315 330
316 bh = sb_bread(sbi->s_super, sum_blocknr); 331 sumbytes = le32_to_cpu(sum->ss_sumbytes);
332 blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize);
333 bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
317 if (unlikely(!bh)) 334 if (unlikely(!bh))
318 goto out; 335 goto out;
319 336
320 offset = le16_to_cpu( 337 offset = le16_to_cpu(sum->ss_bytes);
321 ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
322 for (;;) { 338 for (;;) {
323 unsigned long nblocks, ndatablk, nnodeblk; 339 unsigned long nblocks, ndatablk, nnodeblk;
324 struct nilfs_finfo *finfo; 340 struct nilfs_finfo *finfo;
325 341
326 finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo)); 342 finfo = nilfs_read_summary_info(nilfs, &bh, &offset,
343 sizeof(*finfo));
327 if (unlikely(!finfo)) 344 if (unlikely(!finfo))
328 goto out; 345 goto out;
329 346
@@ -336,8 +353,8 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
336 struct nilfs_recovery_block *rb; 353 struct nilfs_recovery_block *rb;
337 struct nilfs_binfo_v *binfo; 354 struct nilfs_binfo_v *binfo;
338 355
339 binfo = segsum_get(sbi->s_super, &bh, &offset, 356 binfo = nilfs_read_summary_info(nilfs, &bh, &offset,
340 sizeof(*binfo)); 357 sizeof(*binfo));
341 if (unlikely(!binfo)) 358 if (unlikely(!binfo))
342 goto out; 359 goto out;
343 360
@@ -355,9 +372,9 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
355 } 372 }
356 if (--nfinfo == 0) 373 if (--nfinfo == 0)
357 break; 374 break;
358 blocknr += nnodeblk; /* always 0 for the data sync segments */ 375 blocknr += nnodeblk; /* always 0 for data sync logs */
359 segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64), 376 nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64),
360 nnodeblk); 377 nnodeblk);
361 if (unlikely(!bh)) 378 if (unlikely(!bh))
362 goto out; 379 goto out;
363 } 380 }
@@ -467,14 +484,14 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
467 return err; 484 return err;
468} 485}
469 486
470static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, 487static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
471 struct nilfs_recovery_block *rb, 488 struct nilfs_recovery_block *rb,
472 struct page *page) 489 struct page *page)
473{ 490{
474 struct buffer_head *bh_org; 491 struct buffer_head *bh_org;
475 void *kaddr; 492 void *kaddr;
476 493
477 bh_org = sb_bread(sbi->s_super, rb->blocknr); 494 bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
478 if (unlikely(!bh_org)) 495 if (unlikely(!bh_org))
479 return -EIO; 496 return -EIO;
480 497
@@ -485,13 +502,14 @@ static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
485 return 0; 502 return 0;
486} 503}
487 504
488static int recover_dsync_blocks(struct nilfs_sb_info *sbi, 505static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
489 struct list_head *head, 506 struct nilfs_sb_info *sbi,
490 unsigned long *nr_salvaged_blocks) 507 struct list_head *head,
508 unsigned long *nr_salvaged_blocks)
491{ 509{
492 struct inode *inode; 510 struct inode *inode;
493 struct nilfs_recovery_block *rb, *n; 511 struct nilfs_recovery_block *rb, *n;
494 unsigned blocksize = sbi->s_super->s_blocksize; 512 unsigned blocksize = nilfs->ns_blocksize;
495 struct page *page; 513 struct page *page;
496 loff_t pos; 514 loff_t pos;
497 int err = 0, err2 = 0; 515 int err = 0, err2 = 0;
@@ -505,13 +523,16 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
505 } 523 }
506 524
507 pos = rb->blkoff << inode->i_blkbits; 525 pos = rb->blkoff << inode->i_blkbits;
508 page = NULL; 526 err = block_write_begin(inode->i_mapping, pos, blocksize,
509 err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, 527 0, &page, nilfs_get_block);
510 0, &page, NULL, nilfs_get_block); 528 if (unlikely(err)) {
511 if (unlikely(err)) 529 loff_t isize = inode->i_size;
530 if (pos + blocksize > isize)
531 vmtruncate(inode, isize);
512 goto failed_inode; 532 goto failed_inode;
533 }
513 534
514 err = nilfs_recovery_copy_block(sbi, rb, page); 535 err = nilfs_recovery_copy_block(nilfs, rb, page);
515 if (unlikely(err)) 536 if (unlikely(err))
516 goto failed_page; 537 goto failed_page;
517 538
@@ -551,18 +572,20 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
551/** 572/**
552 * nilfs_do_roll_forward - salvage logical segments newer than the latest 573 * nilfs_do_roll_forward - salvage logical segments newer than the latest
553 * checkpoint 574 * checkpoint
575 * @nilfs: nilfs object
554 * @sbi: nilfs_sb_info 576 * @sbi: nilfs_sb_info
555 * @nilfs: the_nilfs
556 * @ri: pointer to a nilfs_recovery_info 577 * @ri: pointer to a nilfs_recovery_info
557 */ 578 */
558static int nilfs_do_roll_forward(struct the_nilfs *nilfs, 579static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
559 struct nilfs_sb_info *sbi, 580 struct nilfs_sb_info *sbi,
560 struct nilfs_recovery_info *ri) 581 struct nilfs_recovery_info *ri)
561{ 582{
562 struct nilfs_segsum_info ssi; 583 struct buffer_head *bh_sum = NULL;
584 struct nilfs_segment_summary *sum;
563 sector_t pseg_start; 585 sector_t pseg_start;
564 sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ 586 sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */
565 unsigned long nsalvaged_blocks = 0; 587 unsigned long nsalvaged_blocks = 0;
588 unsigned int flags;
566 u64 seg_seq; 589 u64 seg_seq;
567 __u64 segnum, nextnum = 0; 590 __u64 segnum, nextnum = 0;
568 int empty_seg = 0; 591 int empty_seg = 0;
@@ -581,8 +604,14 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
581 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); 604 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
582 605
583 while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { 606 while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
607 brelse(bh_sum);
608 bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
609 if (!bh_sum) {
610 err = -EIO;
611 goto failed;
612 }
584 613
585 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi); 614 ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
586 if (ret) { 615 if (ret) {
587 if (ret == NILFS_SEG_FAIL_IO) { 616 if (ret == NILFS_SEG_FAIL_IO) {
588 err = -EIO; 617 err = -EIO;
@@ -590,33 +619,38 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
590 } 619 }
591 goto strayed; 620 goto strayed;
592 } 621 }
593 if (unlikely(NILFS_SEG_HAS_SR(&ssi))) 622
623 flags = le16_to_cpu(sum->ss_flags);
624 if (flags & NILFS_SS_SR)
594 goto confused; 625 goto confused;
595 626
596 /* Found a valid partial segment; do recovery actions */ 627 /* Found a valid partial segment; do recovery actions */
597 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); 628 nextnum = nilfs_get_segnum_of_block(nilfs,
629 le64_to_cpu(sum->ss_next));
598 empty_seg = 0; 630 empty_seg = 0;
599 nilfs->ns_ctime = ssi.ctime; 631 nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
600 if (!(ssi.flags & NILFS_SS_GC)) 632 if (!(flags & NILFS_SS_GC))
601 nilfs->ns_nongc_ctime = ssi.ctime; 633 nilfs->ns_nongc_ctime = nilfs->ns_ctime;
602 634
603 switch (state) { 635 switch (state) {
604 case RF_INIT_ST: 636 case RF_INIT_ST:
605 if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi)) 637 if (!(flags & NILFS_SS_LOGBGN) ||
638 !(flags & NILFS_SS_SYNDT))
606 goto try_next_pseg; 639 goto try_next_pseg;
607 state = RF_DSYNC_ST; 640 state = RF_DSYNC_ST;
608 /* Fall through */ 641 /* Fall through */
609 case RF_DSYNC_ST: 642 case RF_DSYNC_ST:
610 if (!NILFS_SEG_DSYNC(&ssi)) 643 if (!(flags & NILFS_SS_SYNDT))
611 goto confused; 644 goto confused;
612 645
613 err = collect_blocks_from_segsum( 646 err = nilfs_scan_dsync_log(nilfs, pseg_start, sum,
614 sbi, pseg_start, &ssi, &dsync_blocks); 647 &dsync_blocks);
615 if (unlikely(err)) 648 if (unlikely(err))
616 goto failed; 649 goto failed;
617 if (NILFS_SEG_LOGEND(&ssi)) { 650 if (flags & NILFS_SS_LOGEND) {
618 err = recover_dsync_blocks( 651 err = nilfs_recover_dsync_blocks(
619 sbi, &dsync_blocks, &nsalvaged_blocks); 652 nilfs, sbi, &dsync_blocks,
653 &nsalvaged_blocks);
620 if (unlikely(err)) 654 if (unlikely(err))
621 goto failed; 655 goto failed;
622 state = RF_INIT_ST; 656 state = RF_INIT_ST;
@@ -627,7 +661,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
627 try_next_pseg: 661 try_next_pseg:
628 if (pseg_start == ri->ri_lsegs_end) 662 if (pseg_start == ri->ri_lsegs_end)
629 break; 663 break;
630 pseg_start += ssi.nblocks; 664 pseg_start += le32_to_cpu(sum->ss_nblocks);
631 if (pseg_start < seg_end) 665 if (pseg_start < seg_end)
632 continue; 666 continue;
633 goto feed_segment; 667 goto feed_segment;
@@ -652,8 +686,9 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
652 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; 686 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
653 } 687 }
654 out: 688 out:
689 brelse(bh_sum);
655 dispose_recovery_list(&dsync_blocks); 690 dispose_recovery_list(&dsync_blocks);
656 nilfs_detach_writer(sbi->s_nilfs, sbi); 691 nilfs_detach_writer(nilfs, sbi);
657 return err; 692 return err;
658 693
659 confused: 694 confused:
@@ -667,7 +702,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
667} 702}
668 703
669static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, 704static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
670 struct nilfs_sb_info *sbi,
671 struct nilfs_recovery_info *ri) 705 struct nilfs_recovery_info *ri)
672{ 706{
673 struct buffer_head *bh; 707 struct buffer_head *bh;
@@ -677,7 +711,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
677 nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) 711 nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
678 return; 712 return;
679 713
680 bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start); 714 bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize);
681 BUG_ON(!bh); 715 BUG_ON(!bh);
682 memset(bh->b_data, 0, bh->b_size); 716 memset(bh->b_data, 0, bh->b_size);
683 set_buffer_dirty(bh); 717 set_buffer_dirty(bh);
@@ -690,9 +724,8 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
690} 724}
691 725
692/** 726/**
693 * nilfs_recover_logical_segments - salvage logical segments written after 727 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
694 * the latest super root 728 * @nilfs: nilfs object
695 * @nilfs: the_nilfs
696 * @sbi: nilfs_sb_info 729 * @sbi: nilfs_sb_info
697 * @ri: pointer to a nilfs_recovery_info struct to store search results. 730 * @ri: pointer to a nilfs_recovery_info struct to store search results.
698 * 731 *
@@ -709,9 +742,9 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
709 * 742 *
710 * %-ENOMEM - Insufficient memory available. 743 * %-ENOMEM - Insufficient memory available.
711 */ 744 */
712int nilfs_recover_logical_segments(struct the_nilfs *nilfs, 745int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
713 struct nilfs_sb_info *sbi, 746 struct nilfs_sb_info *sbi,
714 struct nilfs_recovery_info *ri) 747 struct nilfs_recovery_info *ri)
715{ 748{
716 int err; 749 int err;
717 750
@@ -751,7 +784,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
751 goto failed; 784 goto failed;
752 } 785 }
753 786
754 nilfs_finish_roll_forward(nilfs, sbi, ri); 787 nilfs_finish_roll_forward(nilfs, ri);
755 } 788 }
756 789
757 failed: 790 failed:
@@ -762,7 +795,6 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
762/** 795/**
763 * nilfs_search_super_root - search the latest valid super root 796 * nilfs_search_super_root - search the latest valid super root
764 * @nilfs: the_nilfs 797 * @nilfs: the_nilfs
765 * @sbi: nilfs_sb_info
766 * @ri: pointer to a nilfs_recovery_info struct to store search results. 798 * @ri: pointer to a nilfs_recovery_info struct to store search results.
767 * 799 *
768 * nilfs_search_super_root() looks for the latest super-root from a partial 800 * nilfs_search_super_root() looks for the latest super-root from a partial
@@ -775,14 +807,19 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
775 * %-EINVAL - No valid segment found 807 * %-EINVAL - No valid segment found
776 * 808 *
777 * %-EIO - I/O error 809 * %-EIO - I/O error
810 *
811 * %-ENOMEM - Insufficient memory available.
778 */ 812 */
779int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, 813int nilfs_search_super_root(struct the_nilfs *nilfs,
780 struct nilfs_recovery_info *ri) 814 struct nilfs_recovery_info *ri)
781{ 815{
782 struct nilfs_segsum_info ssi; 816 struct buffer_head *bh_sum = NULL;
817 struct nilfs_segment_summary *sum;
783 sector_t pseg_start, pseg_end, sr_pseg_start = 0; 818 sector_t pseg_start, pseg_end, sr_pseg_start = 0;
784 sector_t seg_start, seg_end; /* range of full segment (block number) */ 819 sector_t seg_start, seg_end; /* range of full segment (block number) */
785 sector_t b, end; 820 sector_t b, end;
821 unsigned long nblocks;
822 unsigned int flags;
786 u64 seg_seq; 823 u64 seg_seq;
787 __u64 segnum, nextnum = 0; 824 __u64 segnum, nextnum = 0;
788 __u64 cno; 825 __u64 cno;
@@ -801,17 +838,24 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
801 /* Read ahead segment */ 838 /* Read ahead segment */
802 b = seg_start; 839 b = seg_start;
803 while (b <= seg_end) 840 while (b <= seg_end)
804 sb_breadahead(sbi->s_super, b++); 841 __breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize);
805 842
806 for (;;) { 843 for (;;) {
807 /* Load segment summary */ 844 brelse(bh_sum);
808 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi); 845 ret = NILFS_SEG_FAIL_IO;
846 bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
847 if (!bh_sum)
848 goto failed;
849
850 ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
809 if (ret) { 851 if (ret) {
810 if (ret == NILFS_SEG_FAIL_IO) 852 if (ret == NILFS_SEG_FAIL_IO)
811 goto failed; 853 goto failed;
812 goto strayed; 854 goto strayed;
813 } 855 }
814 pseg_end = pseg_start + ssi.nblocks - 1; 856
857 nblocks = le32_to_cpu(sum->ss_nblocks);
858 pseg_end = pseg_start + nblocks - 1;
815 if (unlikely(pseg_end > seg_end)) { 859 if (unlikely(pseg_end > seg_end)) {
816 ret = NILFS_SEG_FAIL_CONSISTENCY; 860 ret = NILFS_SEG_FAIL_CONSISTENCY;
817 goto strayed; 861 goto strayed;
@@ -821,11 +865,13 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
821 ri->ri_pseg_start = pseg_start; 865 ri->ri_pseg_start = pseg_start;
822 ri->ri_seq = seg_seq; 866 ri->ri_seq = seg_seq;
823 ri->ri_segnum = segnum; 867 ri->ri_segnum = segnum;
824 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); 868 nextnum = nilfs_get_segnum_of_block(nilfs,
869 le64_to_cpu(sum->ss_next));
825 ri->ri_nextnum = nextnum; 870 ri->ri_nextnum = nextnum;
826 empty_seg = 0; 871 empty_seg = 0;
827 872
828 if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) { 873 flags = le16_to_cpu(sum->ss_flags);
874 if (!(flags & NILFS_SS_SR) && !scan_newer) {
829 /* This will never happen because a superblock 875 /* This will never happen because a superblock
830 (last_segment) always points to a pseg 876 (last_segment) always points to a pseg
831 having a super root. */ 877 having a super root. */
@@ -836,14 +882,15 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
836 if (pseg_start == seg_start) { 882 if (pseg_start == seg_start) {
837 nilfs_get_segment_range(nilfs, nextnum, &b, &end); 883 nilfs_get_segment_range(nilfs, nextnum, &b, &end);
838 while (b <= end) 884 while (b <= end)
839 sb_breadahead(sbi->s_super, b++); 885 __breadahead(nilfs->ns_bdev, b++,
886 nilfs->ns_blocksize);
840 } 887 }
841 if (!NILFS_SEG_HAS_SR(&ssi)) { 888 if (!(flags & NILFS_SS_SR)) {
842 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { 889 if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) {
843 ri->ri_lsegs_start = pseg_start; 890 ri->ri_lsegs_start = pseg_start;
844 ri->ri_lsegs_start_seq = seg_seq; 891 ri->ri_lsegs_start_seq = seg_seq;
845 } 892 }
846 if (NILFS_SEG_LOGEND(&ssi)) 893 if (flags & NILFS_SS_LOGEND)
847 ri->ri_lsegs_end = pseg_start; 894 ri->ri_lsegs_end = pseg_start;
848 goto try_next_pseg; 895 goto try_next_pseg;
849 } 896 }
@@ -854,12 +901,12 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
854 ri->ri_lsegs_start = ri->ri_lsegs_end = 0; 901 ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
855 902
856 nilfs_dispose_segment_list(&segments); 903 nilfs_dispose_segment_list(&segments);
857 nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start) 904 sr_pseg_start = pseg_start;
858 + ssi.nblocks - seg_start; 905 nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start;
859 nilfs->ns_seg_seq = seg_seq; 906 nilfs->ns_seg_seq = seg_seq;
860 nilfs->ns_segnum = segnum; 907 nilfs->ns_segnum = segnum;
861 nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ 908 nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */
862 nilfs->ns_ctime = ssi.ctime; 909 nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
863 nilfs->ns_nextnum = nextnum; 910 nilfs->ns_nextnum = nextnum;
864 911
865 if (scan_newer) 912 if (scan_newer)
@@ -870,15 +917,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
870 scan_newer = 1; 917 scan_newer = 1;
871 } 918 }
872 919
873 /* reset region for roll-forward */
874 pseg_start += ssi.nblocks;
875 if (pseg_start < seg_end)
876 continue;
877 goto feed_segment;
878
879 try_next_pseg: 920 try_next_pseg:
880 /* Standing on a course, or met an inconsistent state */ 921 /* Standing on a course, or met an inconsistent state */
881 pseg_start += ssi.nblocks; 922 pseg_start += nblocks;
882 if (pseg_start < seg_end) 923 if (pseg_start < seg_end)
883 continue; 924 continue;
884 goto feed_segment; 925 goto feed_segment;
@@ -909,6 +950,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
909 950
910 super_root_found: 951 super_root_found:
911 /* Updating pointers relating to the latest checkpoint */ 952 /* Updating pointers relating to the latest checkpoint */
953 brelse(bh_sum);
912 list_splice_tail(&segments, &ri->ri_used_segments); 954 list_splice_tail(&segments, &ri->ri_used_segments);
913 nilfs->ns_last_pseg = sr_pseg_start; 955 nilfs->ns_last_pseg = sr_pseg_start;
914 nilfs->ns_last_seq = nilfs->ns_seg_seq; 956 nilfs->ns_last_seq = nilfs->ns_seg_seq;
@@ -916,6 +958,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
916 return 0; 958 return 0;
917 959
918 failed: 960 failed:
961 brelse(bh_sum);
919 nilfs_dispose_segment_list(&segments); 962 nilfs_dispose_segment_list(&segments);
920 return (ret < 0) ? ret : nilfs_warn_segment_error(ret); 963 return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
921} 964}
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 85fbb66455e2..b04f08cc2397 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -54,17 +54,6 @@ struct nilfs_segsum_info {
54 sector_t next; 54 sector_t next;
55}; 55};
56 56
57/* macro for the flags */
58#define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR)
59#define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN)
60#define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND)
61#define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT)
62#define NILFS_SEG_SIMPLEX(sum) \
63 (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \
64 (NILFS_SS_LOGBGN | NILFS_SS_LOGEND))
65
66#define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk)
67
68/** 57/**
69 * struct nilfs_segment_buffer - Segment buffer 58 * struct nilfs_segment_buffer - Segment buffer
70 * @sb_super: back pointer to a superblock struct 59 * @sb_super: back pointer to a superblock struct
@@ -141,6 +130,19 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
141 struct buffer_head **); 130 struct buffer_head **);
142void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); 131void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *);
143 132
133static inline int nilfs_segbuf_simplex(struct nilfs_segment_buffer *segbuf)
134{
135 unsigned int flags = segbuf->sb_sum.flags;
136
137 return (flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) ==
138 (NILFS_SS_LOGBGN | NILFS_SS_LOGEND);
139}
140
141static inline int nilfs_segbuf_empty(struct nilfs_segment_buffer *segbuf)
142{
143 return segbuf->sb_sum.nblocks == segbuf->sb_sum.nsumblk;
144}
145
144static inline void 146static inline void
145nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, 147nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf,
146 struct buffer_head *bh) 148 struct buffer_head *bh)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index c9201649cc49..9fd051a33c4f 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1914,12 +1914,12 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1914 } 1914 }
1915 } 1915 }
1916 1916
1917 if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { 1917 if (!nilfs_segbuf_simplex(segbuf)) {
1918 if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { 1918 if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) {
1919 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1919 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1920 sci->sc_lseg_stime = jiffies; 1920 sci->sc_lseg_stime = jiffies;
1921 } 1921 }
1922 if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) 1922 if (segbuf->sb_sum.flags & NILFS_SS_LOGEND)
1923 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1923 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1924 } 1924 }
1925 } 1925 }
@@ -1951,7 +1951,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1951 if (update_sr) { 1951 if (update_sr) {
1952 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, 1952 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
1953 segbuf->sb_sum.seg_seq, nilfs->ns_cno++); 1953 segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
1954 set_nilfs_sb_dirty(nilfs);
1955 1954
1956 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 1955 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
1957 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); 1956 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
@@ -2082,7 +2081,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2082 2081
2083 /* Avoid empty segment */ 2082 /* Avoid empty segment */
2084 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2083 if (sci->sc_stage.scnt == NILFS_ST_DONE &&
2085 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { 2084 nilfs_segbuf_empty(sci->sc_curseg)) {
2086 nilfs_segctor_abort_construction(sci, nilfs, 1); 2085 nilfs_segctor_abort_construction(sci, nilfs, 1);
2087 goto out; 2086 goto out;
2088 } 2087 }
@@ -2408,6 +2407,7 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2408{ 2407{
2409 struct nilfs_sb_info *sbi = sci->sc_sbi; 2408 struct nilfs_sb_info *sbi = sci->sc_sbi;
2410 struct the_nilfs *nilfs = sbi->s_nilfs; 2409 struct the_nilfs *nilfs = sbi->s_nilfs;
2410 struct nilfs_super_block **sbp;
2411 int err = 0; 2411 int err = 0;
2412 2412
2413 nilfs_segctor_accept(sci); 2413 nilfs_segctor_accept(sci);
@@ -2423,8 +2423,13 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2423 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && 2423 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2424 nilfs_discontinued(nilfs)) { 2424 nilfs_discontinued(nilfs)) {
2425 down_write(&nilfs->ns_sem); 2425 down_write(&nilfs->ns_sem);
2426 err = nilfs_commit_super( 2426 err = -EIO;
2427 sbi, nilfs_altsb_need_update(nilfs)); 2427 sbp = nilfs_prepare_super(sbi,
2428 nilfs_sb_will_flip(nilfs));
2429 if (likely(sbp)) {
2430 nilfs_set_log_cursor(sbp[0], nilfs);
2431 err = nilfs_commit_super(sbi, NILFS_SB_COMMIT);
2432 }
2428 up_write(&nilfs->ns_sem); 2433 up_write(&nilfs->ns_sem);
2429 } 2434 }
2430 } 2435 }
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 01e20dbb217d..17c487bd8152 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -234,13 +234,13 @@ extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *);
234extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); 234extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
235 235
236/* recovery.c */ 236/* recovery.c */
237extern int nilfs_read_super_root_block(struct super_block *, sector_t, 237extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
238 struct buffer_head **, int); 238 struct buffer_head **, int);
239extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, 239extern int nilfs_search_super_root(struct the_nilfs *,
240 struct nilfs_recovery_info *); 240 struct nilfs_recovery_info *);
241extern int nilfs_recover_logical_segments(struct the_nilfs *, 241extern int nilfs_salvage_orphan_logs(struct the_nilfs *,
242 struct nilfs_sb_info *, 242 struct nilfs_sb_info *,
243 struct nilfs_recovery_info *); 243 struct nilfs_recovery_info *);
244extern void nilfs_dispose_segment_list(struct list_head *); 244extern void nilfs_dispose_segment_list(struct list_head *);
245 245
246#endif /* _NILFS_SEGMENT_H */ 246#endif /* _NILFS_SEGMENT_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 414ef68931cf..1fa86b9df73b 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -55,6 +55,8 @@
55#include "nilfs.h" 55#include "nilfs.h"
56#include "mdt.h" 56#include "mdt.h"
57#include "alloc.h" 57#include "alloc.h"
58#include "btree.h"
59#include "btnode.h"
58#include "page.h" 60#include "page.h"
59#include "cpfile.h" 61#include "cpfile.h"
60#include "ifile.h" 62#include "ifile.h"
@@ -74,6 +76,25 @@ struct kmem_cache *nilfs_btree_path_cache;
74 76
75static int nilfs_remount(struct super_block *sb, int *flags, char *data); 77static int nilfs_remount(struct super_block *sb, int *flags, char *data);
76 78
79static void nilfs_set_error(struct nilfs_sb_info *sbi)
80{
81 struct the_nilfs *nilfs = sbi->s_nilfs;
82 struct nilfs_super_block **sbp;
83
84 down_write(&nilfs->ns_sem);
85 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
86 nilfs->ns_mount_state |= NILFS_ERROR_FS;
87 sbp = nilfs_prepare_super(sbi, 0);
88 if (likely(sbp)) {
89 sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
90 if (sbp[1])
91 sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
92 nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
93 }
94 }
95 up_write(&nilfs->ns_sem);
96}
97
77/** 98/**
78 * nilfs_error() - report failure condition on a filesystem 99 * nilfs_error() - report failure condition on a filesystem
79 * 100 *
@@ -99,16 +120,7 @@ void nilfs_error(struct super_block *sb, const char *function,
99 va_end(args); 120 va_end(args);
100 121
101 if (!(sb->s_flags & MS_RDONLY)) { 122 if (!(sb->s_flags & MS_RDONLY)) {
102 struct the_nilfs *nilfs = sbi->s_nilfs; 123 nilfs_set_error(sbi);
103
104 down_write(&nilfs->ns_sem);
105 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
106 nilfs->ns_mount_state |= NILFS_ERROR_FS;
107 nilfs->ns_sbp[0]->s_state |=
108 cpu_to_le16(NILFS_ERROR_FS);
109 nilfs_commit_super(sbi, 1);
110 }
111 up_write(&nilfs->ns_sem);
112 124
113 if (nilfs_test_opt(sbi, ERRORS_RO)) { 125 if (nilfs_test_opt(sbi, ERRORS_RO)) {
114 printk(KERN_CRIT "Remounting filesystem read-only\n"); 126 printk(KERN_CRIT "Remounting filesystem read-only\n");
@@ -159,24 +171,7 @@ void nilfs_destroy_inode(struct inode *inode)
159 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 171 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
160} 172}
161 173
162static void nilfs_clear_inode(struct inode *inode) 174static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
163{
164 struct nilfs_inode_info *ii = NILFS_I(inode);
165
166 /*
167 * Free resources allocated in nilfs_read_inode(), here.
168 */
169 BUG_ON(!list_empty(&ii->i_dirty));
170 brelse(ii->i_bh);
171 ii->i_bh = NULL;
172
173 if (test_bit(NILFS_I_BMAP, &ii->i_state))
174 nilfs_bmap_clear(ii->i_bmap);
175
176 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
177}
178
179static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
180{ 175{
181 struct the_nilfs *nilfs = sbi->s_nilfs; 176 struct the_nilfs *nilfs = sbi->s_nilfs;
182 int err; 177 int err;
@@ -202,12 +197,20 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
202 printk(KERN_ERR 197 printk(KERN_ERR
203 "NILFS: unable to write superblock (err=%d)\n", err); 198 "NILFS: unable to write superblock (err=%d)\n", err);
204 if (err == -EIO && nilfs->ns_sbh[1]) { 199 if (err == -EIO && nilfs->ns_sbh[1]) {
200 /*
201 * sbp[0] points to newer log than sbp[1],
202 * so copy sbp[0] to sbp[1] to take over sbp[0].
203 */
204 memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0],
205 nilfs->ns_sbsize);
205 nilfs_fall_back_super_block(nilfs); 206 nilfs_fall_back_super_block(nilfs);
206 goto retry; 207 goto retry;
207 } 208 }
208 } else { 209 } else {
209 struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; 210 struct nilfs_super_block *sbp = nilfs->ns_sbp[0];
210 211
212 nilfs->ns_sbwcount++;
213
211 /* 214 /*
212 * The latest segment becomes trailable from the position 215 * The latest segment becomes trailable from the position
213 * written in superblock. 216 * written in superblock.
@@ -216,66 +219,122 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
216 219
217 /* update GC protection for recent segments */ 220 /* update GC protection for recent segments */
218 if (nilfs->ns_sbh[1]) { 221 if (nilfs->ns_sbh[1]) {
219 sbp = NULL; 222 if (flag == NILFS_SB_COMMIT_ALL) {
220 if (dupsb) {
221 set_buffer_dirty(nilfs->ns_sbh[1]); 223 set_buffer_dirty(nilfs->ns_sbh[1]);
222 if (!sync_dirty_buffer(nilfs->ns_sbh[1])) 224 if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0)
223 sbp = nilfs->ns_sbp[1]; 225 goto out;
224 } 226 }
227 if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) <
228 le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno))
229 sbp = nilfs->ns_sbp[1];
225 } 230 }
226 if (sbp) {
227 spin_lock(&nilfs->ns_last_segment_lock);
228 nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq);
229 spin_unlock(&nilfs->ns_last_segment_lock);
230 }
231 }
232 231
232 spin_lock(&nilfs->ns_last_segment_lock);
233 nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq);
234 spin_unlock(&nilfs->ns_last_segment_lock);
235 }
236 out:
233 return err; 237 return err;
234} 238}
235 239
236int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) 240void nilfs_set_log_cursor(struct nilfs_super_block *sbp,
241 struct the_nilfs *nilfs)
242{
243 sector_t nfreeblocks;
244
245 /* nilfs->ns_sem must be locked by the caller. */
246 nilfs_count_free_blocks(nilfs, &nfreeblocks);
247 sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks);
248
249 spin_lock(&nilfs->ns_last_segment_lock);
250 sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq);
251 sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg);
252 sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno);
253 spin_unlock(&nilfs->ns_last_segment_lock);
254}
255
256struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
257 int flip)
237{ 258{
238 struct the_nilfs *nilfs = sbi->s_nilfs; 259 struct the_nilfs *nilfs = sbi->s_nilfs;
239 struct nilfs_super_block **sbp = nilfs->ns_sbp; 260 struct nilfs_super_block **sbp = nilfs->ns_sbp;
240 sector_t nfreeblocks;
241 time_t t;
242 int err;
243 261
244 /* nilfs->sem must be locked by the caller. */ 262 /* nilfs->ns_sem must be locked by the caller. */
245 if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { 263 if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
246 if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) 264 if (sbp[1] &&
247 nilfs_swap_super_block(nilfs); 265 sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) {
248 else { 266 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
267 } else {
249 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", 268 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
250 sbi->s_super->s_id); 269 sbi->s_super->s_id);
251 return -EIO; 270 return NULL;
252 } 271 }
272 } else if (sbp[1] &&
273 sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
274 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
253 } 275 }
254 err = nilfs_count_free_blocks(nilfs, &nfreeblocks);
255 if (unlikely(err)) {
256 printk(KERN_ERR "NILFS: failed to count free blocks\n");
257 return err;
258 }
259 spin_lock(&nilfs->ns_last_segment_lock);
260 sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq);
261 sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg);
262 sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno);
263 spin_unlock(&nilfs->ns_last_segment_lock);
264 276
277 if (flip && sbp[1])
278 nilfs_swap_super_block(nilfs);
279
280 return sbp;
281}
282
283int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag)
284{
285 struct the_nilfs *nilfs = sbi->s_nilfs;
286 struct nilfs_super_block **sbp = nilfs->ns_sbp;
287 time_t t;
288
289 /* nilfs->ns_sem must be locked by the caller. */
265 t = get_seconds(); 290 t = get_seconds();
266 nilfs->ns_sbwtime[0] = t; 291 nilfs->ns_sbwtime = t;
267 sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks);
268 sbp[0]->s_wtime = cpu_to_le64(t); 292 sbp[0]->s_wtime = cpu_to_le64(t);
269 sbp[0]->s_sum = 0; 293 sbp[0]->s_sum = 0;
270 sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, 294 sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
271 (unsigned char *)sbp[0], 295 (unsigned char *)sbp[0],
272 nilfs->ns_sbsize)); 296 nilfs->ns_sbsize));
273 if (dupsb && sbp[1]) { 297 if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) {
274 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); 298 sbp[1]->s_wtime = sbp[0]->s_wtime;
275 nilfs->ns_sbwtime[1] = t; 299 sbp[1]->s_sum = 0;
300 sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
301 (unsigned char *)sbp[1],
302 nilfs->ns_sbsize));
276 } 303 }
277 clear_nilfs_sb_dirty(nilfs); 304 clear_nilfs_sb_dirty(nilfs);
278 return nilfs_sync_super(sbi, dupsb); 305 return nilfs_sync_super(sbi, flag);
306}
307
308/**
309 * nilfs_cleanup_super() - write filesystem state for cleanup
310 * @sbi: nilfs_sb_info to be unmounted or degraded to read-only
311 *
312 * This function restores state flags in the on-disk super block.
313 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
314 * filesystem was not clean previously.
315 */
316int nilfs_cleanup_super(struct nilfs_sb_info *sbi)
317{
318 struct nilfs_super_block **sbp;
319 int flag = NILFS_SB_COMMIT;
320 int ret = -EIO;
321
322 sbp = nilfs_prepare_super(sbi, 0);
323 if (sbp) {
324 sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state);
325 nilfs_set_log_cursor(sbp[0], sbi->s_nilfs);
326 if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) {
327 /*
328 * make the "clean" flag also to the opposite
329 * super block if both super blocks point to
330 * the same checkpoint.
331 */
332 sbp[1]->s_state = sbp[0]->s_state;
333 flag = NILFS_SB_COMMIT_ALL;
334 }
335 ret = nilfs_commit_super(sbi, flag);
336 }
337 return ret;
279} 338}
280 339
281static void nilfs_put_super(struct super_block *sb) 340static void nilfs_put_super(struct super_block *sb)
@@ -289,8 +348,7 @@ static void nilfs_put_super(struct super_block *sb)
289 348
290 if (!(sb->s_flags & MS_RDONLY)) { 349 if (!(sb->s_flags & MS_RDONLY)) {
291 down_write(&nilfs->ns_sem); 350 down_write(&nilfs->ns_sem);
292 nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); 351 nilfs_cleanup_super(sbi);
293 nilfs_commit_super(sbi, 1);
294 up_write(&nilfs->ns_sem); 352 up_write(&nilfs->ns_sem);
295 } 353 }
296 down_write(&nilfs->ns_super_sem); 354 down_write(&nilfs->ns_super_sem);
@@ -311,6 +369,7 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
311{ 369{
312 struct nilfs_sb_info *sbi = NILFS_SB(sb); 370 struct nilfs_sb_info *sbi = NILFS_SB(sb);
313 struct the_nilfs *nilfs = sbi->s_nilfs; 371 struct the_nilfs *nilfs = sbi->s_nilfs;
372 struct nilfs_super_block **sbp;
314 int err = 0; 373 int err = 0;
315 374
316 /* This function is called when super block should be written back */ 375 /* This function is called when super block should be written back */
@@ -318,8 +377,13 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
318 err = nilfs_construct_segment(sb); 377 err = nilfs_construct_segment(sb);
319 378
320 down_write(&nilfs->ns_sem); 379 down_write(&nilfs->ns_sem);
321 if (nilfs_sb_dirty(nilfs)) 380 if (nilfs_sb_dirty(nilfs)) {
322 nilfs_commit_super(sbi, 1); 381 sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs));
382 if (likely(sbp)) {
383 nilfs_set_log_cursor(sbp[0], nilfs);
384 nilfs_commit_super(sbi, NILFS_SB_COMMIT);
385 }
386 }
323 up_write(&nilfs->ns_sem); 387 up_write(&nilfs->ns_sem);
324 388
325 return err; 389 return err;
@@ -442,20 +506,20 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
442 struct nilfs_sb_info *sbi = NILFS_SB(sb); 506 struct nilfs_sb_info *sbi = NILFS_SB(sb);
443 507
444 if (!nilfs_test_opt(sbi, BARRIER)) 508 if (!nilfs_test_opt(sbi, BARRIER))
445 seq_printf(seq, ",nobarrier"); 509 seq_puts(seq, ",nobarrier");
446 if (nilfs_test_opt(sbi, SNAPSHOT)) 510 if (nilfs_test_opt(sbi, SNAPSHOT))
447 seq_printf(seq, ",cp=%llu", 511 seq_printf(seq, ",cp=%llu",
448 (unsigned long long int)sbi->s_snapshot_cno); 512 (unsigned long long int)sbi->s_snapshot_cno);
449 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 513 if (nilfs_test_opt(sbi, ERRORS_PANIC))
450 seq_printf(seq, ",errors=panic"); 514 seq_puts(seq, ",errors=panic");
451 if (nilfs_test_opt(sbi, ERRORS_CONT)) 515 if (nilfs_test_opt(sbi, ERRORS_CONT))
452 seq_printf(seq, ",errors=continue"); 516 seq_puts(seq, ",errors=continue");
453 if (nilfs_test_opt(sbi, STRICT_ORDER)) 517 if (nilfs_test_opt(sbi, STRICT_ORDER))
454 seq_printf(seq, ",order=strict"); 518 seq_puts(seq, ",order=strict");
455 if (nilfs_test_opt(sbi, NORECOVERY)) 519 if (nilfs_test_opt(sbi, NORECOVERY))
456 seq_printf(seq, ",norecovery"); 520 seq_puts(seq, ",norecovery");
457 if (nilfs_test_opt(sbi, DISCARD)) 521 if (nilfs_test_opt(sbi, DISCARD))
458 seq_printf(seq, ",discard"); 522 seq_puts(seq, ",discard");
459 523
460 return 0; 524 return 0;
461} 525}
@@ -467,7 +531,7 @@ static const struct super_operations nilfs_sops = {
467 /* .write_inode = nilfs_write_inode, */ 531 /* .write_inode = nilfs_write_inode, */
468 /* .put_inode = nilfs_put_inode, */ 532 /* .put_inode = nilfs_put_inode, */
469 /* .drop_inode = nilfs_drop_inode, */ 533 /* .drop_inode = nilfs_drop_inode, */
470 .delete_inode = nilfs_delete_inode, 534 .evict_inode = nilfs_evict_inode,
471 .put_super = nilfs_put_super, 535 .put_super = nilfs_put_super,
472 /* .write_super = nilfs_write_super, */ 536 /* .write_super = nilfs_write_super, */
473 .sync_fs = nilfs_sync_fs, 537 .sync_fs = nilfs_sync_fs,
@@ -475,7 +539,6 @@ static const struct super_operations nilfs_sops = {
475 /* .unlockfs */ 539 /* .unlockfs */
476 .statfs = nilfs_statfs, 540 .statfs = nilfs_statfs,
477 .remount_fs = nilfs_remount, 541 .remount_fs = nilfs_remount,
478 .clear_inode = nilfs_clear_inode,
479 /* .umount_begin */ 542 /* .umount_begin */
480 .show_options = nilfs_show_options 543 .show_options = nilfs_show_options
481}; 544};
@@ -524,23 +587,25 @@ static const struct export_operations nilfs_export_ops = {
524 587
525enum { 588enum {
526 Opt_err_cont, Opt_err_panic, Opt_err_ro, 589 Opt_err_cont, Opt_err_panic, Opt_err_ro,
527 Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, 590 Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
528 Opt_discard, Opt_err, 591 Opt_discard, Opt_nodiscard, Opt_err,
529}; 592};
530 593
531static match_table_t tokens = { 594static match_table_t tokens = {
532 {Opt_err_cont, "errors=continue"}, 595 {Opt_err_cont, "errors=continue"},
533 {Opt_err_panic, "errors=panic"}, 596 {Opt_err_panic, "errors=panic"},
534 {Opt_err_ro, "errors=remount-ro"}, 597 {Opt_err_ro, "errors=remount-ro"},
598 {Opt_barrier, "barrier"},
535 {Opt_nobarrier, "nobarrier"}, 599 {Opt_nobarrier, "nobarrier"},
536 {Opt_snapshot, "cp=%u"}, 600 {Opt_snapshot, "cp=%u"},
537 {Opt_order, "order=%s"}, 601 {Opt_order, "order=%s"},
538 {Opt_norecovery, "norecovery"}, 602 {Opt_norecovery, "norecovery"},
539 {Opt_discard, "discard"}, 603 {Opt_discard, "discard"},
604 {Opt_nodiscard, "nodiscard"},
540 {Opt_err, NULL} 605 {Opt_err, NULL}
541}; 606};
542 607
543static int parse_options(char *options, struct super_block *sb) 608static int parse_options(char *options, struct super_block *sb, int is_remount)
544{ 609{
545 struct nilfs_sb_info *sbi = NILFS_SB(sb); 610 struct nilfs_sb_info *sbi = NILFS_SB(sb);
546 char *p; 611 char *p;
@@ -557,6 +622,9 @@ static int parse_options(char *options, struct super_block *sb)
557 622
558 token = match_token(p, tokens, args); 623 token = match_token(p, tokens, args);
559 switch (token) { 624 switch (token) {
625 case Opt_barrier:
626 nilfs_set_opt(sbi, BARRIER);
627 break;
560 case Opt_nobarrier: 628 case Opt_nobarrier:
561 nilfs_clear_opt(sbi, BARRIER); 629 nilfs_clear_opt(sbi, BARRIER);
562 break; 630 break;
@@ -582,8 +650,26 @@ static int parse_options(char *options, struct super_block *sb)
582 case Opt_snapshot: 650 case Opt_snapshot:
583 if (match_int(&args[0], &option) || option <= 0) 651 if (match_int(&args[0], &option) || option <= 0)
584 return 0; 652 return 0;
585 if (!(sb->s_flags & MS_RDONLY)) 653 if (is_remount) {
654 if (!nilfs_test_opt(sbi, SNAPSHOT)) {
655 printk(KERN_ERR
656 "NILFS: cannot change regular "
657 "mount to snapshot.\n");
658 return 0;
659 } else if (option != sbi->s_snapshot_cno) {
660 printk(KERN_ERR
661 "NILFS: cannot remount to a "
662 "different snapshot.\n");
663 return 0;
664 }
665 break;
666 }
667 if (!(sb->s_flags & MS_RDONLY)) {
668 printk(KERN_ERR "NILFS: cannot mount snapshot "
669 "read/write. A read-only option is "
670 "required.\n");
586 return 0; 671 return 0;
672 }
587 sbi->s_snapshot_cno = option; 673 sbi->s_snapshot_cno = option;
588 nilfs_set_opt(sbi, SNAPSHOT); 674 nilfs_set_opt(sbi, SNAPSHOT);
589 break; 675 break;
@@ -593,6 +679,9 @@ static int parse_options(char *options, struct super_block *sb)
593 case Opt_discard: 679 case Opt_discard:
594 nilfs_set_opt(sbi, DISCARD); 680 nilfs_set_opt(sbi, DISCARD);
595 break; 681 break;
682 case Opt_nodiscard:
683 nilfs_clear_opt(sbi, DISCARD);
684 break;
596 default: 685 default:
597 printk(KERN_ERR 686 printk(KERN_ERR
598 "NILFS: Unrecognized mount option \"%s\"\n", p); 687 "NILFS: Unrecognized mount option \"%s\"\n", p);
@@ -613,11 +702,18 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi,
613static int nilfs_setup_super(struct nilfs_sb_info *sbi) 702static int nilfs_setup_super(struct nilfs_sb_info *sbi)
614{ 703{
615 struct the_nilfs *nilfs = sbi->s_nilfs; 704 struct the_nilfs *nilfs = sbi->s_nilfs;
616 struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; 705 struct nilfs_super_block **sbp;
617 int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); 706 int max_mnt_count;
618 int mnt_count = le16_to_cpu(sbp->s_mnt_count); 707 int mnt_count;
708
709 /* nilfs->ns_sem must be locked by the caller. */
710 sbp = nilfs_prepare_super(sbi, 0);
711 if (!sbp)
712 return -EIO;
713
714 max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count);
715 mnt_count = le16_to_cpu(sbp[0]->s_mnt_count);
619 716
620 /* nilfs->sem must be locked by the caller. */
621 if (nilfs->ns_mount_state & NILFS_ERROR_FS) { 717 if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
622 printk(KERN_WARNING 718 printk(KERN_WARNING
623 "NILFS warning: mounting fs with errors\n"); 719 "NILFS warning: mounting fs with errors\n");
@@ -628,12 +724,15 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi)
628#endif 724#endif
629 } 725 }
630 if (!max_mnt_count) 726 if (!max_mnt_count)
631 sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); 727 sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
632 728
633 sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); 729 sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
634 sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); 730 sbp[0]->s_state =
635 sbp->s_mtime = cpu_to_le64(get_seconds()); 731 cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
636 return nilfs_commit_super(sbi, 1); 732 sbp[0]->s_mtime = cpu_to_le64(get_seconds());
733 /* synchronize sbp[1] with sbp[0] */
734 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
735 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
637} 736}
638 737
639struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, 738struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
@@ -670,7 +769,31 @@ int nilfs_store_magic_and_option(struct super_block *sb,
670 sbi->s_interval = le32_to_cpu(sbp->s_c_interval); 769 sbi->s_interval = le32_to_cpu(sbp->s_c_interval);
671 sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); 770 sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max);
672 771
673 return !parse_options(data, sb) ? -EINVAL : 0 ; 772 return !parse_options(data, sb, 0) ? -EINVAL : 0 ;
773}
774
775int nilfs_check_feature_compatibility(struct super_block *sb,
776 struct nilfs_super_block *sbp)
777{
778 __u64 features;
779
780 features = le64_to_cpu(sbp->s_feature_incompat) &
781 ~NILFS_FEATURE_INCOMPAT_SUPP;
782 if (features) {
783 printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
784 "optional features (%llx)\n",
785 (unsigned long long)features);
786 return -EINVAL;
787 }
788 features = le64_to_cpu(sbp->s_feature_compat_ro) &
789 ~NILFS_FEATURE_COMPAT_RO_SUPP;
790 if (!(sb->s_flags & MS_RDONLY) && features) {
791 printk(KERN_ERR "NILFS: couldn't mount RDWR because of "
792 "unsupported optional features (%llx)\n",
793 (unsigned long long)features);
794 return -EINVAL;
795 }
796 return 0;
674} 797}
675 798
676/** 799/**
@@ -819,7 +942,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
819static int nilfs_remount(struct super_block *sb, int *flags, char *data) 942static int nilfs_remount(struct super_block *sb, int *flags, char *data)
820{ 943{
821 struct nilfs_sb_info *sbi = NILFS_SB(sb); 944 struct nilfs_sb_info *sbi = NILFS_SB(sb);
822 struct nilfs_super_block *sbp;
823 struct the_nilfs *nilfs = sbi->s_nilfs; 945 struct the_nilfs *nilfs = sbi->s_nilfs;
824 unsigned long old_sb_flags; 946 unsigned long old_sb_flags;
825 struct nilfs_mount_options old_opts; 947 struct nilfs_mount_options old_opts;
@@ -833,32 +955,17 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
833 old_opts.snapshot_cno = sbi->s_snapshot_cno; 955 old_opts.snapshot_cno = sbi->s_snapshot_cno;
834 was_snapshot = nilfs_test_opt(sbi, SNAPSHOT); 956 was_snapshot = nilfs_test_opt(sbi, SNAPSHOT);
835 957
836 if (!parse_options(data, sb)) { 958 if (!parse_options(data, sb, 1)) {
837 err = -EINVAL; 959 err = -EINVAL;
838 goto restore_opts; 960 goto restore_opts;
839 } 961 }
840 sb->s_flags = (sb->s_flags & ~MS_POSIXACL); 962 sb->s_flags = (sb->s_flags & ~MS_POSIXACL);
841 963
842 err = -EINVAL; 964 err = -EINVAL;
843 if (was_snapshot) { 965 if (was_snapshot && !(*flags & MS_RDONLY)) {
844 if (!(*flags & MS_RDONLY)) { 966 printk(KERN_ERR "NILFS (device %s): cannot remount snapshot "
845 printk(KERN_ERR "NILFS (device %s): cannot remount " 967 "read/write.\n", sb->s_id);
846 "snapshot read/write.\n", 968 goto restore_opts;
847 sb->s_id);
848 goto restore_opts;
849 } else if (sbi->s_snapshot_cno != old_opts.snapshot_cno) {
850 printk(KERN_ERR "NILFS (device %s): cannot "
851 "remount to a different snapshot.\n",
852 sb->s_id);
853 goto restore_opts;
854 }
855 } else {
856 if (nilfs_test_opt(sbi, SNAPSHOT)) {
857 printk(KERN_ERR "NILFS (device %s): cannot change "
858 "a regular mount to a snapshot.\n",
859 sb->s_id);
860 goto restore_opts;
861 }
862 } 969 }
863 970
864 if (!nilfs_valid_fs(nilfs)) { 971 if (!nilfs_valid_fs(nilfs)) {
@@ -880,19 +987,29 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
880 * the RDONLY flag and then mark the partition as valid again. 987 * the RDONLY flag and then mark the partition as valid again.
881 */ 988 */
882 down_write(&nilfs->ns_sem); 989 down_write(&nilfs->ns_sem);
883 sbp = nilfs->ns_sbp[0]; 990 nilfs_cleanup_super(sbi);
884 if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) &&
885 (nilfs->ns_mount_state & NILFS_VALID_FS))
886 sbp->s_state = cpu_to_le16(nilfs->ns_mount_state);
887 sbp->s_mtime = cpu_to_le64(get_seconds());
888 nilfs_commit_super(sbi, 1);
889 up_write(&nilfs->ns_sem); 991 up_write(&nilfs->ns_sem);
890 } else { 992 } else {
993 __u64 features;
994
891 /* 995 /*
892 * Mounting a RDONLY partition read-write, so reread and 996 * Mounting a RDONLY partition read-write, so reread and
893 * store the current valid flag. (It may have been changed 997 * store the current valid flag. (It may have been changed
894 * by fsck since we originally mounted the partition.) 998 * by fsck since we originally mounted the partition.)
895 */ 999 */
1000 down_read(&nilfs->ns_sem);
1001 features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
1002 ~NILFS_FEATURE_COMPAT_RO_SUPP;
1003 up_read(&nilfs->ns_sem);
1004 if (features) {
1005 printk(KERN_WARNING "NILFS (device %s): couldn't "
1006 "remount RDWR because of unsupported optional "
1007 "features (%llx)\n",
1008 sb->s_id, (unsigned long long)features);
1009 err = -EROFS;
1010 goto restore_opts;
1011 }
1012
896 sb->s_flags &= ~MS_RDONLY; 1013 sb->s_flags &= ~MS_RDONLY;
897 1014
898 err = nilfs_attach_segment_constructor(sbi); 1015 err = nilfs_attach_segment_constructor(sbi);
@@ -1119,7 +1236,7 @@ static void nilfs_inode_init_once(void *obj)
1119 init_rwsem(&ii->xattr_sem); 1236 init_rwsem(&ii->xattr_sem);
1120#endif 1237#endif
1121 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1238 nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
1122 ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; 1239 ii->i_bmap = &ii->i_bmap_data;
1123 inode_init_once(&ii->vfs_inode); 1240 inode_init_once(&ii->vfs_inode);
1124} 1241}
1125 1242
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8c1097327abc..37de1f062d81 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -38,6 +38,8 @@
38static LIST_HEAD(nilfs_objects); 38static LIST_HEAD(nilfs_objects);
39static DEFINE_SPINLOCK(nilfs_lock); 39static DEFINE_SPINLOCK(nilfs_lock);
40 40
41static int nilfs_valid_sb(struct nilfs_super_block *sbp);
42
41void nilfs_set_last_segment(struct the_nilfs *nilfs, 43void nilfs_set_last_segment(struct the_nilfs *nilfs,
42 sector_t start_blocknr, u64 seq, __u64 cno) 44 sector_t start_blocknr, u64 seq, __u64 cno)
43{ 45{
@@ -45,6 +47,16 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs,
45 nilfs->ns_last_pseg = start_blocknr; 47 nilfs->ns_last_pseg = start_blocknr;
46 nilfs->ns_last_seq = seq; 48 nilfs->ns_last_seq = seq;
47 nilfs->ns_last_cno = cno; 49 nilfs->ns_last_cno = cno;
50
51 if (!nilfs_sb_dirty(nilfs)) {
52 if (nilfs->ns_prev_seq == nilfs->ns_last_seq)
53 goto stay_cursor;
54
55 set_nilfs_sb_dirty(nilfs);
56 }
57 nilfs->ns_prev_seq = nilfs->ns_last_seq;
58
59 stay_cursor:
48 spin_unlock(&nilfs->ns_last_segment_lock); 60 spin_unlock(&nilfs->ns_last_segment_lock);
49} 61}
50 62
@@ -159,8 +171,7 @@ void put_nilfs(struct the_nilfs *nilfs)
159 kfree(nilfs); 171 kfree(nilfs);
160} 172}
161 173
162static int nilfs_load_super_root(struct the_nilfs *nilfs, 174static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block)
163 struct nilfs_sb_info *sbi, sector_t sr_block)
164{ 175{
165 struct buffer_head *bh_sr; 176 struct buffer_head *bh_sr;
166 struct nilfs_super_root *raw_sr; 177 struct nilfs_super_root *raw_sr;
@@ -169,7 +180,7 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
169 unsigned inode_size; 180 unsigned inode_size;
170 int err; 181 int err;
171 182
172 err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); 183 err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1);
173 if (unlikely(err)) 184 if (unlikely(err))
174 return err; 185 return err;
175 186
@@ -248,6 +259,37 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri)
248} 259}
249 260
250/** 261/**
262 * nilfs_store_log_cursor - load log cursor from a super block
263 * @nilfs: nilfs object
264 * @sbp: buffer storing super block to be read
265 *
266 * nilfs_store_log_cursor() reads the last position of the log
267 * containing a super root from a given super block, and initializes
268 * relevant information on the nilfs object preparatory for log
269 * scanning and recovery.
270 */
271static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
272 struct nilfs_super_block *sbp)
273{
274 int ret = 0;
275
276 nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg);
277 nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno);
278 nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq);
279
280 nilfs->ns_prev_seq = nilfs->ns_last_seq;
281 nilfs->ns_seg_seq = nilfs->ns_last_seq;
282 nilfs->ns_segnum =
283 nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg);
284 nilfs->ns_cno = nilfs->ns_last_cno + 1;
285 if (nilfs->ns_segnum >= nilfs->ns_nsegments) {
286 printk(KERN_ERR "NILFS invalid last segment number.\n");
287 ret = -EINVAL;
288 }
289 return ret;
290}
291
292/**
251 * load_nilfs - load and recover the nilfs 293 * load_nilfs - load and recover the nilfs
252 * @nilfs: the_nilfs structure to be released 294 * @nilfs: the_nilfs structure to be released
253 * @sbi: nilfs_sb_info used to recover past segment 295 * @sbi: nilfs_sb_info used to recover past segment
@@ -285,13 +327,55 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
285 327
286 nilfs_init_recovery_info(&ri); 328 nilfs_init_recovery_info(&ri);
287 329
288 err = nilfs_search_super_root(nilfs, sbi, &ri); 330 err = nilfs_search_super_root(nilfs, &ri);
289 if (unlikely(err)) { 331 if (unlikely(err)) {
290 printk(KERN_ERR "NILFS: error searching super root.\n"); 332 struct nilfs_super_block **sbp = nilfs->ns_sbp;
291 goto failed; 333 int blocksize;
334
335 if (err != -EINVAL)
336 goto scan_error;
337
338 if (!nilfs_valid_sb(sbp[1])) {
339 printk(KERN_WARNING
340 "NILFS warning: unable to fall back to spare"
341 "super block\n");
342 goto scan_error;
343 }
344 printk(KERN_INFO
345 "NILFS: try rollback from an earlier position\n");
346
347 /*
348 * restore super block with its spare and reconfigure
349 * relevant states of the nilfs object.
350 */
351 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
352 nilfs->ns_crc_seed = le32_to_cpu(sbp[0]->s_crc_seed);
353 nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
354
355 /* verify consistency between two super blocks */
356 blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size);
357 if (blocksize != nilfs->ns_blocksize) {
358 printk(KERN_WARNING
359 "NILFS warning: blocksize differs between "
360 "two super blocks (%d != %d)\n",
361 blocksize, nilfs->ns_blocksize);
362 goto scan_error;
363 }
364
365 err = nilfs_store_log_cursor(nilfs, sbp[0]);
366 if (err)
367 goto scan_error;
368
369 /* drop clean flag to allow roll-forward and recovery */
370 nilfs->ns_mount_state &= ~NILFS_VALID_FS;
371 valid_fs = 0;
372
373 err = nilfs_search_super_root(nilfs, &ri);
374 if (err)
375 goto scan_error;
292 } 376 }
293 377
294 err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); 378 err = nilfs_load_super_root(nilfs, ri.ri_super_root);
295 if (unlikely(err)) { 379 if (unlikely(err)) {
296 printk(KERN_ERR "NILFS: error loading super root.\n"); 380 printk(KERN_ERR "NILFS: error loading super root.\n");
297 goto failed; 381 goto failed;
@@ -301,11 +385,23 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
301 goto skip_recovery; 385 goto skip_recovery;
302 386
303 if (s_flags & MS_RDONLY) { 387 if (s_flags & MS_RDONLY) {
388 __u64 features;
389
304 if (nilfs_test_opt(sbi, NORECOVERY)) { 390 if (nilfs_test_opt(sbi, NORECOVERY)) {
305 printk(KERN_INFO "NILFS: norecovery option specified. " 391 printk(KERN_INFO "NILFS: norecovery option specified. "
306 "skipping roll-forward recovery\n"); 392 "skipping roll-forward recovery\n");
307 goto skip_recovery; 393 goto skip_recovery;
308 } 394 }
395 features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
396 ~NILFS_FEATURE_COMPAT_RO_SUPP;
397 if (features) {
398 printk(KERN_ERR "NILFS: couldn't proceed with "
399 "recovery because of unsupported optional "
400 "features (%llx)\n",
401 (unsigned long long)features);
402 err = -EROFS;
403 goto failed_unload;
404 }
309 if (really_read_only) { 405 if (really_read_only) {
310 printk(KERN_ERR "NILFS: write access " 406 printk(KERN_ERR "NILFS: write access "
311 "unavailable, cannot proceed.\n"); 407 "unavailable, cannot proceed.\n");
@@ -320,14 +416,13 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
320 goto failed_unload; 416 goto failed_unload;
321 } 417 }
322 418
323 err = nilfs_recover_logical_segments(nilfs, sbi, &ri); 419 err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri);
324 if (err) 420 if (err)
325 goto failed_unload; 421 goto failed_unload;
326 422
327 down_write(&nilfs->ns_sem); 423 down_write(&nilfs->ns_sem);
328 nilfs->ns_mount_state |= NILFS_VALID_FS; 424 nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */
329 nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); 425 err = nilfs_cleanup_super(sbi);
330 err = nilfs_commit_super(sbi, 1);
331 up_write(&nilfs->ns_sem); 426 up_write(&nilfs->ns_sem);
332 427
333 if (err) { 428 if (err) {
@@ -343,6 +438,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
343 sbi->s_super->s_flags = s_flags; 438 sbi->s_super->s_flags = s_flags;
344 return 0; 439 return 0;
345 440
441 scan_error:
442 printk(KERN_ERR "NILFS: error searching super root.\n");
443 goto failed;
444
346 failed_unload: 445 failed_unload:
347 nilfs_mdt_destroy(nilfs->ns_cpfile); 446 nilfs_mdt_destroy(nilfs->ns_cpfile);
348 nilfs_mdt_destroy(nilfs->ns_sufile); 447 nilfs_mdt_destroy(nilfs->ns_sufile);
@@ -515,8 +614,8 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
515 nilfs_swap_super_block(nilfs); 614 nilfs_swap_super_block(nilfs);
516 } 615 }
517 616
518 nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); 617 nilfs->ns_sbwcount = 0;
519 nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; 618 nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
520 nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); 619 nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq);
521 *sbpp = sbp[0]; 620 *sbpp = sbp[0];
522 return 0; 621 return 0;
@@ -557,6 +656,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
557 if (err) 656 if (err)
558 goto out; 657 goto out;
559 658
659 err = nilfs_check_feature_compatibility(sb, sbp);
660 if (err)
661 goto out;
662
560 blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); 663 blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
561 if (sb->s_blocksize != blocksize && 664 if (sb->s_blocksize != blocksize &&
562 !sb_set_blocksize(sb, blocksize)) { 665 !sb_set_blocksize(sb, blocksize)) {
@@ -568,7 +671,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
568 goto out; 671 goto out;
569 } 672 }
570 673
571 blocksize = sb_min_blocksize(sb, BLOCK_SIZE); 674 blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
572 if (!blocksize) { 675 if (!blocksize) {
573 printk(KERN_ERR "NILFS: unable to set blocksize\n"); 676 printk(KERN_ERR "NILFS: unable to set blocksize\n");
574 err = -EINVAL; 677 err = -EINVAL;
@@ -582,7 +685,18 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
582 if (err) 685 if (err)
583 goto failed_sbh; 686 goto failed_sbh;
584 687
688 err = nilfs_check_feature_compatibility(sb, sbp);
689 if (err)
690 goto failed_sbh;
691
585 blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); 692 blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
693 if (blocksize < NILFS_MIN_BLOCK_SIZE ||
694 blocksize > NILFS_MAX_BLOCK_SIZE) {
695 printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
696 "filesystem blocksize %d\n", blocksize);
697 err = -EINVAL;
698 goto failed_sbh;
699 }
586 if (sb->s_blocksize != blocksize) { 700 if (sb->s_blocksize != blocksize) {
587 int hw_blocksize = bdev_logical_block_size(sb->s_bdev); 701 int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
588 702
@@ -604,6 +718,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
604 when reloading fails. */ 718 when reloading fails. */
605 } 719 }
606 nilfs->ns_blocksize_bits = sb->s_blocksize_bits; 720 nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
721 nilfs->ns_blocksize = blocksize;
607 722
608 err = nilfs_store_disk_layout(nilfs, sbp); 723 err = nilfs_store_disk_layout(nilfs, sbp);
609 if (err) 724 if (err)
@@ -616,23 +731,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
616 bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; 731 bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
617 nilfs->ns_bdi = bdi ? : &default_backing_dev_info; 732 nilfs->ns_bdi = bdi ? : &default_backing_dev_info;
618 733
619 /* Finding last segment */ 734 err = nilfs_store_log_cursor(nilfs, sbp);
620 nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); 735 if (err)
621 nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno);
622 nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq);
623
624 nilfs->ns_seg_seq = nilfs->ns_last_seq;
625 nilfs->ns_segnum =
626 nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg);
627 nilfs->ns_cno = nilfs->ns_last_cno + 1;
628 if (nilfs->ns_segnum >= nilfs->ns_nsegments) {
629 printk(KERN_ERR "NILFS invalid last segment number.\n");
630 err = -EINVAL;
631 goto failed_sbh; 736 goto failed_sbh;
632 }
633 /* Dummy values */
634 nilfs->ns_free_segments_count =
635 nilfs->ns_nsegments - (nilfs->ns_segnum + 1);
636 737
637 /* Initialize gcinode cache */ 738 /* Initialize gcinode cache */
638 err = nilfs_init_gccache(nilfs); 739 err = nilfs_init_gccache(nilfs);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 1ab974533697..f785a7b0ab99 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -57,7 +57,8 @@ enum {
57 * @ns_current: back pointer to current mount 57 * @ns_current: back pointer to current mount
58 * @ns_sbh: buffer heads of on-disk super blocks 58 * @ns_sbh: buffer heads of on-disk super blocks
59 * @ns_sbp: pointers to super block data 59 * @ns_sbp: pointers to super block data
60 * @ns_sbwtime: previous write time of super blocks 60 * @ns_sbwtime: previous write time of super block
61 * @ns_sbwcount: write count of super block
61 * @ns_sbsize: size of valid data in super block 62 * @ns_sbsize: size of valid data in super block
62 * @ns_supers: list of nilfs super block structs 63 * @ns_supers: list of nilfs super block structs
63 * @ns_seg_seq: segment sequence counter 64 * @ns_seg_seq: segment sequence counter
@@ -73,7 +74,7 @@ enum {
73 * @ns_last_seq: sequence value of the latest segment 74 * @ns_last_seq: sequence value of the latest segment
74 * @ns_last_cno: checkpoint number of the latest segment 75 * @ns_last_cno: checkpoint number of the latest segment
75 * @ns_prot_seq: least sequence number of segments which must not be reclaimed 76 * @ns_prot_seq: least sequence number of segments which must not be reclaimed
76 * @ns_free_segments_count: counter of free segments 77 * @ns_prev_seq: base sequence number used to decide if advance log cursor
77 * @ns_segctor_sem: segment constructor semaphore 78 * @ns_segctor_sem: segment constructor semaphore
78 * @ns_dat: DAT file inode 79 * @ns_dat: DAT file inode
79 * @ns_cpfile: checkpoint file inode 80 * @ns_cpfile: checkpoint file inode
@@ -82,6 +83,7 @@ enum {
82 * @ns_gc_inodes: dummy inodes to keep live blocks 83 * @ns_gc_inodes: dummy inodes to keep live blocks
83 * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks 84 * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks
84 * @ns_blocksize_bits: bit length of block size 85 * @ns_blocksize_bits: bit length of block size
86 * @ns_blocksize: block size
85 * @ns_nsegments: number of segments in filesystem 87 * @ns_nsegments: number of segments in filesystem
86 * @ns_blocks_per_segment: number of blocks per segment 88 * @ns_blocks_per_segment: number of blocks per segment
87 * @ns_r_segments_percentage: reserved segments percentage 89 * @ns_r_segments_percentage: reserved segments percentage
@@ -119,7 +121,8 @@ struct the_nilfs {
119 */ 121 */
120 struct buffer_head *ns_sbh[2]; 122 struct buffer_head *ns_sbh[2];
121 struct nilfs_super_block *ns_sbp[2]; 123 struct nilfs_super_block *ns_sbp[2];
122 time_t ns_sbwtime[2]; 124 time_t ns_sbwtime;
125 unsigned ns_sbwcount;
123 unsigned ns_sbsize; 126 unsigned ns_sbsize;
124 unsigned ns_mount_state; 127 unsigned ns_mount_state;
125 128
@@ -149,7 +152,7 @@ struct the_nilfs {
149 u64 ns_last_seq; 152 u64 ns_last_seq;
150 __u64 ns_last_cno; 153 __u64 ns_last_cno;
151 u64 ns_prot_seq; 154 u64 ns_prot_seq;
152 unsigned long ns_free_segments_count; 155 u64 ns_prev_seq;
153 156
154 struct rw_semaphore ns_segctor_sem; 157 struct rw_semaphore ns_segctor_sem;
155 158
@@ -168,6 +171,7 @@ struct the_nilfs {
168 171
169 /* Disk layout information (static) */ 172 /* Disk layout information (static) */
170 unsigned int ns_blocksize_bits; 173 unsigned int ns_blocksize_bits;
174 unsigned int ns_blocksize;
171 unsigned long ns_nsegments; 175 unsigned long ns_nsegments;
172 unsigned long ns_blocks_per_segment; 176 unsigned long ns_blocks_per_segment;
173 unsigned long ns_r_segments_percentage; 177 unsigned long ns_r_segments_percentage;
@@ -203,20 +207,17 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
203 207
204/* Minimum interval of periodical update of superblocks (in seconds) */ 208/* Minimum interval of periodical update of superblocks (in seconds) */
205#define NILFS_SB_FREQ 10 209#define NILFS_SB_FREQ 10
206#define NILFS_ALTSB_FREQ 60 /* spare superblock */
207 210
208static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) 211static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
209{ 212{
210 u64 t = get_seconds(); 213 u64 t = get_seconds();
211 return t < nilfs->ns_sbwtime[0] || 214 return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
212 t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ;
213} 215}
214 216
215static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs) 217static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
216{ 218{
217 u64 t = get_seconds(); 219 int flip_bits = nilfs->ns_sbwcount & 0x0FL;
218 struct nilfs_super_block **sbp = nilfs->ns_sbp; 220 return (flip_bits != 0x08 && flip_bits != 0x0F);
219 return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
220} 221}
221 222
222void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); 223void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 37b460f302b7..33297c005060 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -244,11 +244,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
244 struct inode *need_iput_tmp; 244 struct inode *need_iput_tmp;
245 245
246 /* 246 /*
247 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, 247 * We cannot __iget() an inode in state I_FREEING,
248 * I_WILL_FREE, or I_NEW which is fine because by that point 248 * I_WILL_FREE, or I_NEW which is fine because by that point
249 * the inode cannot have any associated watches. 249 * the inode cannot have any associated watches.
250 */ 250 */
251 if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) 251 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
252 continue; 252 continue;
253 253
254 /* 254 /*
@@ -272,7 +272,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
272 /* In case the dropping of a reference would nuke next_i. */ 272 /* In case the dropping of a reference would nuke next_i. */
273 if ((&next_i->i_sb_list != list) && 273 if ((&next_i->i_sb_list != list) &&
274 atomic_read(&next_i->i_count) && 274 atomic_read(&next_i->i_count) &&
275 !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { 275 !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
276 __iget(next_i); 276 __iget(next_i);
277 need_iput = next_i; 277 need_iput = next_i;
278 } 278 }
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4b57fb1eac2a..93622b175fc7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2238,7 +2238,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
2238} 2238}
2239 2239
2240/** 2240/**
2241 * ntfs_clear_big_inode - clean up the ntfs specific part of an inode 2241 * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
2242 * @vi: vfs inode pending annihilation 2242 * @vi: vfs inode pending annihilation
2243 * 2243 *
2244 * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() 2244 * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
@@ -2247,10 +2247,13 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
2247 * 2247 *
2248 * If the MFT record is dirty, we commit it before doing anything else. 2248 * If the MFT record is dirty, we commit it before doing anything else.
2249 */ 2249 */
2250void ntfs_clear_big_inode(struct inode *vi) 2250void ntfs_evict_big_inode(struct inode *vi)
2251{ 2251{
2252 ntfs_inode *ni = NTFS_I(vi); 2252 ntfs_inode *ni = NTFS_I(vi);
2253 2253
2254 truncate_inode_pages(&vi->i_data, 0);
2255 end_writeback(vi);
2256
2254#ifdef NTFS_RW 2257#ifdef NTFS_RW
2255 if (NInoDirty(ni)) { 2258 if (NInoDirty(ni)) {
2256 bool was_bad = (is_bad_inode(vi)); 2259 bool was_bad = (is_bad_inode(vi));
@@ -2879,9 +2882,6 @@ void ntfs_truncate_vfs(struct inode *vi) {
2879 * 2882 *
2880 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also 2883 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also
2881 * called with ->i_alloc_sem held for writing. 2884 * called with ->i_alloc_sem held for writing.
2882 *
2883 * Basically this is a copy of generic notify_change() and inode_setattr()
2884 * functionality, except we intercept and abort changes in i_size.
2885 */ 2885 */
2886int ntfs_setattr(struct dentry *dentry, struct iattr *attr) 2886int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2887{ 2887{
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 9a113544605d..2dabf813456c 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -279,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
279 279
280extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); 280extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
281extern void ntfs_destroy_big_inode(struct inode *inode); 281extern void ntfs_destroy_big_inode(struct inode *inode);
282extern void ntfs_clear_big_inode(struct inode *vi); 282extern void ntfs_evict_big_inode(struct inode *vi);
283 283
284extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni); 284extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni);
285 285
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0de1db6cddbf..512806171bfa 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2700,7 +2700,7 @@ static const struct super_operations ntfs_sops = {
2700 .put_super = ntfs_put_super, /* Syscall: umount. */ 2700 .put_super = ntfs_put_super, /* Syscall: umount. */
2701 .statfs = ntfs_statfs, /* Syscall: statfs */ 2701 .statfs = ntfs_statfs, /* Syscall: statfs */
2702 .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ 2702 .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */
2703 .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is 2703 .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is
2704 removed from memory. */ 2704 removed from memory. */
2705 //.umount_begin = NULL, /* Forced umount. */ 2705 //.umount_begin = NULL, /* Forced umount. */
2706 .show_options = ntfs_show_options, /* Show mount options in 2706 .show_options = ntfs_show_options, /* Show mount options in
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 356e976772bf..0de69c9a08be 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -578,7 +578,9 @@ bail:
578static void ocfs2_dio_end_io(struct kiocb *iocb, 578static void ocfs2_dio_end_io(struct kiocb *iocb,
579 loff_t offset, 579 loff_t offset,
580 ssize_t bytes, 580 ssize_t bytes,
581 void *private) 581 void *private,
582 int ret,
583 bool is_async)
582{ 584{
583 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 585 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
584 int level; 586 int level;
@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
592 if (!level) 594 if (!level)
593 up_read(&inode->i_alloc_sem); 595 up_read(&inode->i_alloc_sem);
594 ocfs2_rw_unlock(inode, level); 596 ocfs2_rw_unlock(inode, level);
597
598 if (is_async)
599 aio_complete(iocb, ret, 0);
595} 600}
596 601
597/* 602/*
@@ -638,11 +643,10 @@ static ssize_t ocfs2_direct_IO(int rw,
638 if (i_size_read(inode) <= offset) 643 if (i_size_read(inode) <= offset)
639 return 0; 644 return 0;
640 645
641 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 646 ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
642 inode->i_sb->s_bdev, iov, offset, 647 iov, offset, nr_segs,
643 nr_segs, 648 ocfs2_direct_IO_get_blocks,
644 ocfs2_direct_IO_get_blocks, 649 ocfs2_dio_end_io, NULL, 0);
645 ocfs2_dio_end_io);
646 650
647 mlog_exit(ret); 651 mlog_exit(ret);
648 return ret; 652 return ret;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 0cd24cf54396..5efdd37dfe48 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -419,7 +419,7 @@ static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
419 419
420static int debug_buffer_release(struct inode *inode, struct file *file) 420static int debug_buffer_release(struct inode *inode, struct file *file)
421{ 421{
422 struct debug_buffer *db = (struct debug_buffer *)file->private_data; 422 struct debug_buffer *db = file->private_data;
423 423
424 if (db) 424 if (db)
425 kfree(db->buf); 425 kfree(db->buf);
@@ -715,7 +715,7 @@ static int debug_lockres_open(struct inode *inode, struct file *file)
715 goto bail; 715 goto bail;
716 } 716 }
717 717
718 seq = (struct seq_file *) file->private_data; 718 seq = file->private_data;
719 seq->private = dl; 719 seq->private = dl;
720 720
721 dlm_grab(dlm); 721 dlm_grab(dlm);
@@ -731,7 +731,7 @@ bail:
731 731
732static int debug_lockres_release(struct inode *inode, struct file *file) 732static int debug_lockres_release(struct inode *inode, struct file *file)
733{ 733{
734 struct seq_file *seq = (struct seq_file *)file->private_data; 734 struct seq_file *seq = file->private_data;
735 struct debug_lockres *dl = (struct debug_lockres *)seq->private; 735 struct debug_lockres *dl = (struct debug_lockres *)seq->private;
736 736
737 if (dl->dl_res) 737 if (dl->dl_res)
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b83d6107a1f5..c2903b84bb7a 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -182,8 +182,7 @@ static int dlmfs_file_release(struct inode *inode,
182{ 182{
183 int level, status; 183 int level, status;
184 struct dlmfs_inode_private *ip = DLMFS_I(inode); 184 struct dlmfs_inode_private *ip = DLMFS_I(inode);
185 struct dlmfs_filp_private *fp = 185 struct dlmfs_filp_private *fp = file->private_data;
186 (struct dlmfs_filp_private *) file->private_data;
187 186
188 if (S_ISDIR(inode->i_mode)) 187 if (S_ISDIR(inode->i_mode))
189 BUG(); 188 BUG();
@@ -214,10 +213,12 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
214 213
215 attr->ia_valid &= ~ATTR_SIZE; 214 attr->ia_valid &= ~ATTR_SIZE;
216 error = inode_change_ok(inode, attr); 215 error = inode_change_ok(inode, attr);
217 if (!error) 216 if (error)
218 error = inode_setattr(inode, attr); 217 return error;
219 218
220 return error; 219 setattr_copy(inode, attr);
220 mark_inode_dirty(inode);
221 return 0;
221} 222}
222 223
223static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) 224static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait)
@@ -355,13 +356,12 @@ static void dlmfs_destroy_inode(struct inode *inode)
355 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 356 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
356} 357}
357 358
358static void dlmfs_clear_inode(struct inode *inode) 359static void dlmfs_evict_inode(struct inode *inode)
359{ 360{
360 int status; 361 int status;
361 struct dlmfs_inode_private *ip; 362 struct dlmfs_inode_private *ip;
362 363
363 if (!inode) 364 end_writeback(inode);
364 return;
365 365
366 mlog(0, "inode %lu\n", inode->i_ino); 366 mlog(0, "inode %lu\n", inode->i_ino);
367 367
@@ -631,7 +631,7 @@ static const struct super_operations dlmfs_ops = {
631 .statfs = simple_statfs, 631 .statfs = simple_statfs,
632 .alloc_inode = dlmfs_alloc_inode, 632 .alloc_inode = dlmfs_alloc_inode,
633 .destroy_inode = dlmfs_destroy_inode, 633 .destroy_inode = dlmfs_destroy_inode,
634 .clear_inode = dlmfs_clear_inode, 634 .evict_inode = dlmfs_evict_inode,
635 .drop_inode = generic_delete_inode, 635 .drop_inode = generic_delete_inode,
636}; 636};
637 637
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 39eb16ac5f98..5e02a893f46e 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2966,7 +2966,7 @@ static const struct seq_operations ocfs2_dlm_seq_ops = {
2966 2966
2967static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2967static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2968{ 2968{
2969 struct seq_file *seq = (struct seq_file *) file->private_data; 2969 struct seq_file *seq = file->private_data;
2970 struct ocfs2_dlm_seq_priv *priv = seq->private; 2970 struct ocfs2_dlm_seq_priv *priv = seq->private;
2971 struct ocfs2_lock_res *res = &priv->p_iter_res; 2971 struct ocfs2_lock_res *res = &priv->p_iter_res;
2972 2972
@@ -3000,7 +3000,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
3000 goto out; 3000 goto out;
3001 } 3001 }
3002 3002
3003 seq = (struct seq_file *) file->private_data; 3003 seq = file->private_data;
3004 seq->private = priv; 3004 seq->private = priv;
3005 3005
3006 ocfs2_add_lockres_tracking(&priv->p_iter_res, 3006 ocfs2_add_lockres_tracking(&priv->p_iter_res,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2b10b36d1577..81296b4e3646 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1233,18 +1233,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1233 } 1233 }
1234 1234
1235 /* 1235 /*
1236 * This will intentionally not wind up calling simple_setsize(), 1236 * This will intentionally not wind up calling truncate_setsize(),
1237 * since all the work for a size change has been done above. 1237 * since all the work for a size change has been done above.
1238 * Otherwise, we could get into problems with truncate as 1238 * Otherwise, we could get into problems with truncate as
1239 * ip_alloc_sem is used there to protect against i_size 1239 * ip_alloc_sem is used there to protect against i_size
1240 * changes. 1240 * changes.
1241 *
1242 * XXX: this means the conditional below can probably be removed.
1241 */ 1243 */
1242 status = inode_setattr(inode, attr); 1244 if ((attr->ia_valid & ATTR_SIZE) &&
1243 if (status < 0) { 1245 attr->ia_size != i_size_read(inode)) {
1244 mlog_errno(status); 1246 status = vmtruncate(inode, attr->ia_size);
1245 goto bail_commit; 1247 if (status) {
1248 mlog_errno(status);
1249 goto bail_commit;
1250 }
1246 } 1251 }
1247 1252
1253 setattr_copy(inode, attr);
1254 mark_inode_dirty(inode);
1255
1248 status = ocfs2_mark_inode_dirty(handle, inode, bh); 1256 status = ocfs2_mark_inode_dirty(handle, inode, bh);
1249 if (status < 0) 1257 if (status < 0)
1250 mlog_errno(status); 1258 mlog_errno(status);
@@ -2300,12 +2308,12 @@ relock:
2300 * blocks outside i_size. Trim these off again. 2308 * blocks outside i_size. Trim these off again.
2301 * Don't need i_size_read because we hold i_mutex. 2309 * Don't need i_size_read because we hold i_mutex.
2302 * 2310 *
2303 * XXX(hch): this looks buggy because ocfs2 did not 2311 * XXX(truncate): this looks buggy because ocfs2 did not
2304 * actually implement ->truncate. Take a look at 2312 * actually implement ->truncate. Take a look at
2305 * the new truncate sequence and update this accordingly 2313 * the new truncate sequence and update this accordingly
2306 */ 2314 */
2307 if (*ppos + count > inode->i_size) 2315 if (*ppos + count > inode->i_size)
2308 simple_setsize(inode, inode->i_size); 2316 truncate_setsize(inode, inode->i_size);
2309 ret = written; 2317 ret = written;
2310 goto out_dio; 2318 goto out_dio;
2311 } 2319 }
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index abb0a95cc717..0492464916b1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -969,7 +969,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
969 truncate_inode_pages(&inode->i_data, 0); 969 truncate_inode_pages(&inode->i_data, 0);
970} 970}
971 971
972void ocfs2_delete_inode(struct inode *inode) 972static void ocfs2_delete_inode(struct inode *inode)
973{ 973{
974 int wipe, status; 974 int wipe, status;
975 sigset_t oldset; 975 sigset_t oldset;
@@ -1075,20 +1075,17 @@ bail_unlock_nfs_sync:
1075bail_unblock: 1075bail_unblock:
1076 ocfs2_unblock_signals(&oldset); 1076 ocfs2_unblock_signals(&oldset);
1077bail: 1077bail:
1078 clear_inode(inode);
1079 mlog_exit_void(); 1078 mlog_exit_void();
1080} 1079}
1081 1080
1082void ocfs2_clear_inode(struct inode *inode) 1081static void ocfs2_clear_inode(struct inode *inode)
1083{ 1082{
1084 int status; 1083 int status;
1085 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1084 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1086 1085
1087 mlog_entry_void(); 1086 mlog_entry_void();
1088 1087
1089 if (!inode) 1088 end_writeback(inode);
1090 goto bail;
1091
1092 mlog(0, "Clearing inode: %llu, nlink = %u\n", 1089 mlog(0, "Clearing inode: %llu, nlink = %u\n",
1093 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); 1090 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);
1094 1091
@@ -1180,16 +1177,27 @@ void ocfs2_clear_inode(struct inode *inode)
1180 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, 1177 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1181 &oi->ip_jinode); 1178 &oi->ip_jinode);
1182 1179
1183bail:
1184 mlog_exit_void(); 1180 mlog_exit_void();
1185} 1181}
1186 1182
1183void ocfs2_evict_inode(struct inode *inode)
1184{
1185 if (!inode->i_nlink ||
1186 (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
1187 ocfs2_delete_inode(inode);
1188 } else {
1189 truncate_inode_pages(&inode->i_data, 0);
1190 }
1191 ocfs2_clear_inode(inode);
1192}
1193
1187/* Called under inode_lock, with no more references on the 1194/* Called under inode_lock, with no more references on the
1188 * struct inode, so it's safe here to check the flags field 1195 * struct inode, so it's safe here to check the flags field
1189 * and to manipulate i_nlink without any other locks. */ 1196 * and to manipulate i_nlink without any other locks. */
1190void ocfs2_drop_inode(struct inode *inode) 1197int ocfs2_drop_inode(struct inode *inode)
1191{ 1198{
1192 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1199 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1200 int res;
1193 1201
1194 mlog_entry_void(); 1202 mlog_entry_void();
1195 1203
@@ -1197,11 +1205,12 @@ void ocfs2_drop_inode(struct inode *inode)
1197 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); 1205 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
1198 1206
1199 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) 1207 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1200 generic_delete_inode(inode); 1208 res = 1;
1201 else 1209 else
1202 generic_drop_inode(inode); 1210 res = generic_drop_inode(inode);
1203 1211
1204 mlog_exit_void(); 1212 mlog_exit_void();
1213 return res;
1205} 1214}
1206 1215
1207/* 1216/*
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 9f5f5fcadc45..6de5a869db30 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -123,9 +123,8 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
123 return &OCFS2_I(inode)->ip_metadata_cache; 123 return &OCFS2_I(inode)->ip_metadata_cache;
124} 124}
125 125
126void ocfs2_clear_inode(struct inode *inode); 126void ocfs2_evict_inode(struct inode *inode);
127void ocfs2_delete_inode(struct inode *inode); 127int ocfs2_drop_inode(struct inode *inode);
128void ocfs2_drop_inode(struct inode *inode);
129 128
130/* Flags for ocfs2_iget() */ 129/* Flags for ocfs2_iget() */
131#define OCFS2_FI_FLAG_SYSFILE 0x1 130#define OCFS2_FI_FLAG_SYSFILE 0x1
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 625de9d7088c..9b57c0350ff9 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
760 if (osb->osb_commit_interval) 760 if (osb->osb_commit_interval)
761 commit_interval = osb->osb_commit_interval; 761 commit_interval = osb->osb_commit_interval;
762 762
763 spin_lock(&journal->j_state_lock); 763 write_lock(&journal->j_state_lock);
764 journal->j_commit_interval = commit_interval; 764 journal->j_commit_interval = commit_interval;
765 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) 765 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
766 journal->j_flags |= JBD2_BARRIER; 766 journal->j_flags |= JBD2_BARRIER;
767 else 767 else
768 journal->j_flags &= ~JBD2_BARRIER; 768 journal->j_flags &= ~JBD2_BARRIER;
769 spin_unlock(&journal->j_state_lock); 769 write_unlock(&journal->j_state_lock);
770} 770}
771 771
772int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) 772int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0eaa929a4dbf..fa1be1b304d1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -145,8 +145,7 @@ static const struct super_operations ocfs2_sops = {
145 .alloc_inode = ocfs2_alloc_inode, 145 .alloc_inode = ocfs2_alloc_inode,
146 .destroy_inode = ocfs2_destroy_inode, 146 .destroy_inode = ocfs2_destroy_inode,
147 .drop_inode = ocfs2_drop_inode, 147 .drop_inode = ocfs2_drop_inode,
148 .clear_inode = ocfs2_clear_inode, 148 .evict_inode = ocfs2_evict_inode,
149 .delete_inode = ocfs2_delete_inode,
150 .sync_fs = ocfs2_sync_fs, 149 .sync_fs = ocfs2_sync_fs,
151 .put_super = ocfs2_put_super, 150 .put_super = ocfs2_put_super,
152 .remount_fs = ocfs2_remount, 151 .remount_fs = ocfs2_remount,
@@ -2472,7 +2471,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
2472 kfree(osb->slot_recovery_generations); 2471 kfree(osb->slot_recovery_generations);
2473 /* FIXME 2472 /* FIXME
2474 * This belongs in journal shutdown, but because we have to 2473 * This belongs in journal shutdown, but because we have to
2475 * allocate osb->journal at the start of ocfs2_initalize_osb(), 2474 * allocate osb->journal at the start of ocfs2_initialize_osb(),
2476 * we free it here. 2475 * we free it here.
2477 */ 2476 */
2478 kfree(osb->journal); 2477 kfree(osb->journal);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 6e7a3291bbe8..5542c284dc1c 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -312,9 +312,17 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping,
312 loff_t pos, unsigned len, unsigned flags, 312 loff_t pos, unsigned len, unsigned flags,
313 struct page **pagep, void **fsdata) 313 struct page **pagep, void **fsdata)
314{ 314{
315 *pagep = NULL; 315 int ret;
316 return block_write_begin(file, mapping, pos, len, flags, 316
317 pagep, fsdata, omfs_get_block); 317 ret = block_write_begin(mapping, pos, len, flags, pagep,
318 omfs_get_block);
319 if (unlikely(ret)) {
320 loff_t isize = mapping->host->i_size;
321 if (pos + len > isize)
322 vmtruncate(mapping->host, isize);
323 }
324
325 return ret;
318} 326}
319 327
320static sector_t omfs_bmap(struct address_space *mapping, sector_t block) 328static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
@@ -333,7 +341,29 @@ const struct file_operations omfs_file_operations = {
333 .splice_read = generic_file_splice_read, 341 .splice_read = generic_file_splice_read,
334}; 342};
335 343
344static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
345{
346 struct inode *inode = dentry->d_inode;
347 int error;
348
349 error = inode_change_ok(inode, attr);
350 if (error)
351 return error;
352
353 if ((attr->ia_valid & ATTR_SIZE) &&
354 attr->ia_size != i_size_read(inode)) {
355 error = vmtruncate(inode, attr->ia_size);
356 if (error)
357 return error;
358 }
359
360 setattr_copy(inode, attr);
361 mark_inode_dirty(inode);
362 return 0;
363}
364
336const struct inode_operations omfs_file_inops = { 365const struct inode_operations omfs_file_inops = {
366 .setattr = omfs_setattr,
337 .truncate = omfs_truncate 367 .truncate = omfs_truncate
338}; 368};
339 369
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 089839a6cc64..56121debc22b 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -175,9 +175,13 @@ int omfs_sync_inode(struct inode *inode)
175 * called when an entry is deleted, need to clear the bits in the 175 * called when an entry is deleted, need to clear the bits in the
176 * bitmaps. 176 * bitmaps.
177 */ 177 */
178static void omfs_delete_inode(struct inode *inode) 178static void omfs_evict_inode(struct inode *inode)
179{ 179{
180 truncate_inode_pages(&inode->i_data, 0); 180 truncate_inode_pages(&inode->i_data, 0);
181 end_writeback(inode);
182
183 if (inode->i_nlink)
184 return;
181 185
182 if (S_ISREG(inode->i_mode)) { 186 if (S_ISREG(inode->i_mode)) {
183 inode->i_size = 0; 187 inode->i_size = 0;
@@ -185,7 +189,6 @@ static void omfs_delete_inode(struct inode *inode)
185 } 189 }
186 190
187 omfs_clear_range(inode->i_sb, inode->i_ino, 2); 191 omfs_clear_range(inode->i_sb, inode->i_ino, 2);
188 clear_inode(inode);
189} 192}
190 193
191struct inode *omfs_iget(struct super_block *sb, ino_t ino) 194struct inode *omfs_iget(struct super_block *sb, ino_t ino)
@@ -284,7 +287,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
284 287
285static const struct super_operations omfs_sops = { 288static const struct super_operations omfs_sops = {
286 .write_inode = omfs_write_inode, 289 .write_inode = omfs_write_inode,
287 .delete_inode = omfs_delete_inode, 290 .evict_inode = omfs_evict_inode,
288 .put_super = omfs_put_super, 291 .put_super = omfs_put_super,
289 .statfs = omfs_statfs, 292 .statfs = omfs_statfs,
290 .show_options = generic_show_options, 293 .show_options = generic_show_options,
diff --git a/fs/open.c b/fs/open.c
index bf082635e257..b715d06fbe36 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -111,7 +111,7 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
111 111
112 error = locks_verify_truncate(inode, NULL, length); 112 error = locks_verify_truncate(inode, NULL, length);
113 if (!error) 113 if (!error)
114 error = security_path_truncate(&path, length, 0); 114 error = security_path_truncate(&path);
115 if (!error) 115 if (!error)
116 error = do_truncate(path.dentry, length, 0, NULL); 116 error = do_truncate(path.dentry, length, 0, NULL);
117 117
@@ -166,8 +166,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
166 166
167 error = locks_verify_truncate(inode, file, length); 167 error = locks_verify_truncate(inode, file, length);
168 if (!error) 168 if (!error)
169 error = security_path_truncate(&file->f_path, length, 169 error = security_path_truncate(&file->f_path);
170 ATTR_MTIME|ATTR_CTIME);
171 if (!error) 170 if (!error)
172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 171 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
173out_putf: 172out_putf:
@@ -368,7 +367,7 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename)
368 if (error) 367 if (error)
369 goto out; 368 goto out;
370 369
371 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); 370 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
372 if (error) 371 if (error)
373 goto dput_and_out; 372 goto dput_and_out;
374 373
@@ -397,7 +396,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
397 if (!S_ISDIR(inode->i_mode)) 396 if (!S_ISDIR(inode->i_mode))
398 goto out_putf; 397 goto out_putf;
399 398
400 error = inode_permission(inode, MAY_EXEC | MAY_ACCESS); 399 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
401 if (!error) 400 if (!error)
402 set_fs_pwd(current->fs, &file->f_path); 401 set_fs_pwd(current->fs, &file->f_path);
403out_putf: 402out_putf:
@@ -415,7 +414,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
415 if (error) 414 if (error)
416 goto out; 415 goto out;
417 416
418 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); 417 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
419 if (error) 418 if (error)
420 goto dput_and_out; 419 goto dput_and_out;
421 420
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 5dcd4b0c5533..72c52656dc2e 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -459,7 +459,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
459 } 459 }
460 460
461 /* everything is up and running, commence */ 461 /* everything is up and running, commence */
462 INIT_RCU_HEAD(&p->rcu_head);
463 rcu_assign_pointer(ptbl->part[partno], p); 462 rcu_assign_pointer(ptbl->part[partno], p);
464 463
465 /* suppress uevent if the disk supresses it */ 464 /* suppress uevent if the disk supresses it */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9b58d38bc911..fff6572676ae 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -176,7 +176,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
176 if (tracer) 176 if (tracer)
177 tpid = task_pid_nr_ns(tracer, ns); 177 tpid = task_pid_nr_ns(tracer, ns);
178 } 178 }
179 cred = get_cred((struct cred *) __task_cred(p)); 179 cred = get_task_cred(p);
180 seq_printf(m, 180 seq_printf(m,
181 "State:\t%s\n" 181 "State:\t%s\n"
182 "Tgid:\t%d\n" 182 "Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index acb7ef80ea4f..c806dfb24e08 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -63,6 +63,7 @@
63#include <linux/namei.h> 63#include <linux/namei.h>
64#include <linux/mnt_namespace.h> 64#include <linux/mnt_namespace.h>
65#include <linux/mm.h> 65#include <linux/mm.h>
66#include <linux/swap.h>
66#include <linux/rcupdate.h> 67#include <linux/rcupdate.h>
67#include <linux/kallsyms.h> 68#include <linux/kallsyms.h>
68#include <linux/stacktrace.h> 69#include <linux/stacktrace.h>
@@ -427,17 +428,14 @@ static const struct file_operations proc_lstats_operations = {
427 428
428#endif 429#endif
429 430
430/* The badness from the OOM killer */
431unsigned long badness(struct task_struct *p, unsigned long uptime);
432static int proc_oom_score(struct task_struct *task, char *buffer) 431static int proc_oom_score(struct task_struct *task, char *buffer)
433{ 432{
434 unsigned long points = 0; 433 unsigned long points = 0;
435 struct timespec uptime;
436 434
437 do_posix_clock_monotonic_gettime(&uptime);
438 read_lock(&tasklist_lock); 435 read_lock(&tasklist_lock);
439 if (pid_alive(task)) 436 if (pid_alive(task))
440 points = badness(task, uptime.tv_sec); 437 points = oom_badness(task, NULL, NULL,
438 totalram_pages + total_swap_pages);
441 read_unlock(&tasklist_lock); 439 read_unlock(&tasklist_lock);
442 return sprintf(buffer, "%lu\n", points); 440 return sprintf(buffer, "%lu\n", points);
443} 441}
@@ -561,9 +559,19 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
561 return -EPERM; 559 return -EPERM;
562 560
563 error = inode_change_ok(inode, attr); 561 error = inode_change_ok(inode, attr);
564 if (!error) 562 if (error)
565 error = inode_setattr(inode, attr); 563 return error;
566 return error; 564
565 if ((attr->ia_valid & ATTR_SIZE) &&
566 attr->ia_size != i_size_read(inode)) {
567 error = vmtruncate(inode, attr->ia_size);
568 if (error)
569 return error;
570 }
571
572 setattr_copy(inode, attr);
573 mark_inode_dirty(inode);
574 return 0;
567} 575}
568 576
569static const struct inode_operations proc_def_inode_operations = { 577static const struct inode_operations proc_def_inode_operations = {
@@ -1039,8 +1047,24 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1039 return -EACCES; 1047 return -EACCES;
1040 } 1048 }
1041 1049
1050 /*
1051 * Warn that /proc/pid/oom_adj is deprecated, see
1052 * Documentation/feature-removal-schedule.txt.
1053 */
1054 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, "
1055 "please use /proc/%d/oom_score_adj instead.\n",
1056 current->comm, task_pid_nr(current),
1057 task_pid_nr(task), task_pid_nr(task));
1042 task->signal->oom_adj = oom_adjust; 1058 task->signal->oom_adj = oom_adjust;
1043 1059 /*
1060 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
1061 * value is always attainable.
1062 */
1063 if (task->signal->oom_adj == OOM_ADJUST_MAX)
1064 task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
1065 else
1066 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1067 -OOM_DISABLE;
1044 unlock_task_sighand(task, &flags); 1068 unlock_task_sighand(task, &flags);
1045 put_task_struct(task); 1069 put_task_struct(task);
1046 1070
@@ -1053,6 +1077,82 @@ static const struct file_operations proc_oom_adjust_operations = {
1053 .llseek = generic_file_llseek, 1077 .llseek = generic_file_llseek,
1054}; 1078};
1055 1079
1080static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
1081 size_t count, loff_t *ppos)
1082{
1083 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1084 char buffer[PROC_NUMBUF];
1085 int oom_score_adj = OOM_SCORE_ADJ_MIN;
1086 unsigned long flags;
1087 size_t len;
1088
1089 if (!task)
1090 return -ESRCH;
1091 if (lock_task_sighand(task, &flags)) {
1092 oom_score_adj = task->signal->oom_score_adj;
1093 unlock_task_sighand(task, &flags);
1094 }
1095 put_task_struct(task);
1096 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
1097 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1098}
1099
1100static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1101 size_t count, loff_t *ppos)
1102{
1103 struct task_struct *task;
1104 char buffer[PROC_NUMBUF];
1105 unsigned long flags;
1106 long oom_score_adj;
1107 int err;
1108
1109 memset(buffer, 0, sizeof(buffer));
1110 if (count > sizeof(buffer) - 1)
1111 count = sizeof(buffer) - 1;
1112 if (copy_from_user(buffer, buf, count))
1113 return -EFAULT;
1114
1115 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj);
1116 if (err)
1117 return -EINVAL;
1118 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1119 oom_score_adj > OOM_SCORE_ADJ_MAX)
1120 return -EINVAL;
1121
1122 task = get_proc_task(file->f_path.dentry->d_inode);
1123 if (!task)
1124 return -ESRCH;
1125 if (!lock_task_sighand(task, &flags)) {
1126 put_task_struct(task);
1127 return -ESRCH;
1128 }
1129 if (oom_score_adj < task->signal->oom_score_adj &&
1130 !capable(CAP_SYS_RESOURCE)) {
1131 unlock_task_sighand(task, &flags);
1132 put_task_struct(task);
1133 return -EACCES;
1134 }
1135
1136 task->signal->oom_score_adj = oom_score_adj;
1137 /*
1138 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1139 * always attainable.
1140 */
1141 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1142 task->signal->oom_adj = OOM_DISABLE;
1143 else
1144 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1145 OOM_SCORE_ADJ_MAX;
1146 unlock_task_sighand(task, &flags);
1147 put_task_struct(task);
1148 return count;
1149}
1150
1151static const struct file_operations proc_oom_score_adj_operations = {
1152 .read = oom_score_adj_read,
1153 .write = oom_score_adj_write,
1154};
1155
1056#ifdef CONFIG_AUDITSYSCALL 1156#ifdef CONFIG_AUDITSYSCALL
1057#define TMPBUFLEN 21 1157#define TMPBUFLEN 21
1058static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1158static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -2625,6 +2725,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2625#endif 2725#endif
2626 INF("oom_score", S_IRUGO, proc_oom_score), 2726 INF("oom_score", S_IRUGO, proc_oom_score),
2627 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2727 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
2728 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2628#ifdef CONFIG_AUDITSYSCALL 2729#ifdef CONFIG_AUDITSYSCALL
2629 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2730 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
2630 REG("sessionid", S_IRUGO, proc_sessionid_operations), 2731 REG("sessionid", S_IRUGO, proc_sessionid_operations),
@@ -2959,6 +3060,7 @@ static const struct pid_entry tid_base_stuff[] = {
2959#endif 3060#endif
2960 INF("oom_score", S_IRUGO, proc_oom_score), 3061 INF("oom_score", S_IRUGO, proc_oom_score),
2961 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 3062 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
3063 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2962#ifdef CONFIG_AUDITSYSCALL 3064#ifdef CONFIG_AUDITSYSCALL
2963 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3065 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
2964 REG("sessionid", S_IRUSR, proc_sessionid_operations), 3066 REG("sessionid", S_IRUSR, proc_sessionid_operations),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2791907744ed..dd29f0337661 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -12,6 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/mm.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/mount.h> 18#include <linux/mount.h>
@@ -258,17 +259,22 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
258 259
259 error = inode_change_ok(inode, iattr); 260 error = inode_change_ok(inode, iattr);
260 if (error) 261 if (error)
261 goto out; 262 return error;
262 263
263 error = inode_setattr(inode, iattr); 264 if ((iattr->ia_valid & ATTR_SIZE) &&
264 if (error) 265 iattr->ia_size != i_size_read(inode)) {
265 goto out; 266 error = vmtruncate(inode, iattr->ia_size);
267 if (error)
268 return error;
269 }
270
271 setattr_copy(inode, iattr);
272 mark_inode_dirty(inode);
266 273
267 de->uid = inode->i_uid; 274 de->uid = inode->i_uid;
268 de->gid = inode->i_gid; 275 de->gid = inode->i_gid;
269 de->mode = inode->i_mode; 276 de->mode = inode->i_mode;
270out: 277 return 0;
271 return error;
272} 278}
273 279
274static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 280static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index aea8502e58a3..23561cda7245 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,11 +25,12 @@
25 25
26#include "internal.h" 26#include "internal.h"
27 27
28static void proc_delete_inode(struct inode *inode) 28static void proc_evict_inode(struct inode *inode)
29{ 29{
30 struct proc_dir_entry *de; 30 struct proc_dir_entry *de;
31 31
32 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
33 end_writeback(inode);
33 34
34 /* Stop tracking associated processes */ 35 /* Stop tracking associated processes */
35 put_pid(PROC_I(inode)->pid); 36 put_pid(PROC_I(inode)->pid);
@@ -40,7 +41,6 @@ static void proc_delete_inode(struct inode *inode)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 if (PROC_I(inode)->sysctl)
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 sysctl_head_put(PROC_I(inode)->sysctl);
43 clear_inode(inode);
44} 44}
45 45
46struct vfsmount *proc_mnt; 46struct vfsmount *proc_mnt;
@@ -91,7 +91,7 @@ static const struct super_operations proc_sops = {
91 .alloc_inode = proc_alloc_inode, 91 .alloc_inode = proc_alloc_inode,
92 .destroy_inode = proc_destroy_inode, 92 .destroy_inode = proc_destroy_inode,
93 .drop_inode = generic_delete_inode, 93 .drop_inode = generic_delete_inode,
94 .delete_inode = proc_delete_inode, 94 .evict_inode = proc_evict_inode,
95 .statfs = simple_statfs, 95 .statfs = simple_statfs,
96}; 96};
97 97
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6ff9981f0a18..5be436ea088e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -329,10 +329,19 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
329 return -EPERM; 329 return -EPERM;
330 330
331 error = inode_change_ok(inode, attr); 331 error = inode_change_ok(inode, attr);
332 if (!error) 332 if (error)
333 error = inode_setattr(inode, attr); 333 return error;
334
335 if ((attr->ia_valid & ATTR_SIZE) &&
336 attr->ia_size != i_size_read(inode)) {
337 error = vmtruncate(inode, attr->ia_size);
338 if (error)
339 return error;
340 }
334 341
335 return error; 342 setattr_copy(inode, attr);
343 mark_inode_dirty(inode);
344 return 0;
336} 345}
337 346
338static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 347static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 277575ddc05c..16829722be93 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -320,10 +320,19 @@ static int qnx4_write_begin(struct file *file, struct address_space *mapping,
320 struct page **pagep, void **fsdata) 320 struct page **pagep, void **fsdata)
321{ 321{
322 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); 322 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
323 int ret;
324
323 *pagep = NULL; 325 *pagep = NULL;
324 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 326 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
325 qnx4_get_block, 327 qnx4_get_block,
326 &qnx4_inode->mmu_private); 328 &qnx4_inode->mmu_private);
329 if (unlikely(ret)) {
330 loff_t isize = mapping->host->i_size;
331 if (pos + len > isize)
332 vmtruncate(mapping->host, isize);
333 }
334
335 return ret;
327} 336}
328static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) 337static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
329{ 338{
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 437d2ca2de97..aad1316a977f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -132,6 +132,22 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
132__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); 132__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
133EXPORT_SYMBOL(dq_data_lock); 133EXPORT_SYMBOL(dq_data_lock);
134 134
135void __quota_error(struct super_block *sb, const char *func,
136 const char *fmt, ...)
137{
138 va_list args;
139
140 if (printk_ratelimit()) {
141 va_start(args, fmt);
142 printk(KERN_ERR "Quota error (device %s): %s: ",
143 sb->s_id, func);
144 vprintk(fmt, args);
145 printk("\n");
146 va_end(args);
147 }
148}
149EXPORT_SYMBOL(__quota_error);
150
135#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) 151#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING)
136static char *quotatypes[] = INITQFNAMES; 152static char *quotatypes[] = INITQFNAMES;
137#endif 153#endif
@@ -705,11 +721,8 @@ void dqput(struct dquot *dquot)
705 return; 721 return;
706#ifdef CONFIG_QUOTA_DEBUG 722#ifdef CONFIG_QUOTA_DEBUG
707 if (!atomic_read(&dquot->dq_count)) { 723 if (!atomic_read(&dquot->dq_count)) {
708 printk("VFS: dqput: trying to free free dquot\n"); 724 quota_error(dquot->dq_sb, "trying to free free dquot of %s %d",
709 printk("VFS: device %s, dquot of %s %d\n", 725 quotatypes[dquot->dq_type], dquot->dq_id);
710 dquot->dq_sb->s_id,
711 quotatypes[dquot->dq_type],
712 dquot->dq_id);
713 BUG(); 726 BUG();
714 } 727 }
715#endif 728#endif
@@ -732,9 +745,9 @@ we_slept:
732 /* Commit dquot before releasing */ 745 /* Commit dquot before releasing */
733 ret = dquot->dq_sb->dq_op->write_dquot(dquot); 746 ret = dquot->dq_sb->dq_op->write_dquot(dquot);
734 if (ret < 0) { 747 if (ret < 0) {
735 printk(KERN_ERR "VFS: cannot write quota structure on " 748 quota_error(dquot->dq_sb, "Can't write quota structure"
736 "device %s (error %d). Quota may get out of " 749 " (error %d). Quota may get out of sync!",
737 "sync!\n", dquot->dq_sb->s_id, ret); 750 ret);
738 /* 751 /*
739 * We clear dirty bit anyway, so that we avoid 752 * We clear dirty bit anyway, so that we avoid
740 * infinite loop here 753 * infinite loop here
@@ -885,7 +898,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
885 898
886 spin_lock(&inode_lock); 899 spin_lock(&inode_lock);
887 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 900 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
888 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 901 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
889 continue; 902 continue;
890#ifdef CONFIG_QUOTA_DEBUG 903#ifdef CONFIG_QUOTA_DEBUG
891 if (unlikely(inode_get_rsv_space(inode) > 0)) 904 if (unlikely(inode_get_rsv_space(inode) > 0))
@@ -914,9 +927,9 @@ static void add_dquot_ref(struct super_block *sb, int type)
914 927
915#ifdef CONFIG_QUOTA_DEBUG 928#ifdef CONFIG_QUOTA_DEBUG
916 if (reserved) { 929 if (reserved) {
917 printk(KERN_WARNING "VFS (%s): Writes happened before quota" 930 quota_error(sb, "Writes happened before quota was turned on "
918 " was turned on thus quota information is probably " 931 "thus quota information is probably inconsistent. "
919 "inconsistent. Please run quotacheck(8).\n", sb->s_id); 932 "Please run quotacheck(8)");
920 } 933 }
921#endif 934#endif
922} 935}
@@ -947,7 +960,9 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
947 if (dqput_blocks(dquot)) { 960 if (dqput_blocks(dquot)) {
948#ifdef CONFIG_QUOTA_DEBUG 961#ifdef CONFIG_QUOTA_DEBUG
949 if (atomic_read(&dquot->dq_count) != 1) 962 if (atomic_read(&dquot->dq_count) != 1)
950 printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); 963 quota_error(inode->i_sb, "Adding dquot with "
964 "dq_count %d to dispose list",
965 atomic_read(&dquot->dq_count));
951#endif 966#endif
952 spin_lock(&dq_list_lock); 967 spin_lock(&dq_list_lock);
953 /* As dquot must have currently users it can't be on 968 /* As dquot must have currently users it can't be on
@@ -986,6 +1001,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
986 struct list_head *tofree_head) 1001 struct list_head *tofree_head)
987{ 1002{
988 struct inode *inode; 1003 struct inode *inode;
1004 int reserved = 0;
989 1005
990 spin_lock(&inode_lock); 1006 spin_lock(&inode_lock);
991 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1007 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -995,10 +1011,20 @@ static void remove_dquot_ref(struct super_block *sb, int type,
995 * only quota pointers and these have separate locking 1011 * only quota pointers and these have separate locking
996 * (dqptr_sem). 1012 * (dqptr_sem).
997 */ 1013 */
998 if (!IS_NOQUOTA(inode)) 1014 if (!IS_NOQUOTA(inode)) {
1015 if (unlikely(inode_get_rsv_space(inode) > 0))
1016 reserved = 1;
999 remove_inode_dquot_ref(inode, type, tofree_head); 1017 remove_inode_dquot_ref(inode, type, tofree_head);
1018 }
1000 } 1019 }
1001 spin_unlock(&inode_lock); 1020 spin_unlock(&inode_lock);
1021#ifdef CONFIG_QUOTA_DEBUG
1022 if (reserved) {
1023 printk(KERN_WARNING "VFS (%s): Writes happened after quota"
1024 " was disabled thus quota information is probably "
1025 "inconsistent. Please run quotacheck(8).\n", sb->s_id);
1026 }
1027#endif
1002} 1028}
1003 1029
1004/* Gather all references from inodes and drop them */ 1030/* Gather all references from inodes and drop them */
@@ -1304,6 +1330,15 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
1304 return QUOTA_NL_NOWARN; 1330 return QUOTA_NL_NOWARN;
1305} 1331}
1306 1332
1333static int dquot_active(const struct inode *inode)
1334{
1335 struct super_block *sb = inode->i_sb;
1336
1337 if (IS_NOQUOTA(inode))
1338 return 0;
1339 return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb);
1340}
1341
1307/* 1342/*
1308 * Initialize quota pointers in inode 1343 * Initialize quota pointers in inode
1309 * 1344 *
@@ -1323,7 +1358,7 @@ static void __dquot_initialize(struct inode *inode, int type)
1323 1358
1324 /* First test before acquiring mutex - solves deadlocks when we 1359 /* First test before acquiring mutex - solves deadlocks when we
1325 * re-enter the quota code and are already holding the mutex */ 1360 * re-enter the quota code and are already holding the mutex */
1326 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) 1361 if (!dquot_active(inode))
1327 return; 1362 return;
1328 1363
1329 /* First get references to structures we might need. */ 1364 /* First get references to structures we might need. */
@@ -1507,7 +1542,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1507 * First test before acquiring mutex - solves deadlocks when we 1542 * First test before acquiring mutex - solves deadlocks when we
1508 * re-enter the quota code and are already holding the mutex 1543 * re-enter the quota code and are already holding the mutex
1509 */ 1544 */
1510 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { 1545 if (!dquot_active(inode)) {
1511 inode_incr_space(inode, number, reserve); 1546 inode_incr_space(inode, number, reserve);
1512 goto out; 1547 goto out;
1513 } 1548 }
@@ -1559,7 +1594,7 @@ int dquot_alloc_inode(const struct inode *inode)
1559 1594
1560 /* First test before acquiring mutex - solves deadlocks when we 1595 /* First test before acquiring mutex - solves deadlocks when we
1561 * re-enter the quota code and are already holding the mutex */ 1596 * re-enter the quota code and are already holding the mutex */
1562 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) 1597 if (!dquot_active(inode))
1563 return 0; 1598 return 0;
1564 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1599 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1565 warntype[cnt] = QUOTA_NL_NOWARN; 1600 warntype[cnt] = QUOTA_NL_NOWARN;
@@ -1596,7 +1631,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1596{ 1631{
1597 int cnt; 1632 int cnt;
1598 1633
1599 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { 1634 if (!dquot_active(inode)) {
1600 inode_claim_rsv_space(inode, number); 1635 inode_claim_rsv_space(inode, number);
1601 return 0; 1636 return 0;
1602 } 1637 }
@@ -1629,7 +1664,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1629 1664
1630 /* First test before acquiring mutex - solves deadlocks when we 1665 /* First test before acquiring mutex - solves deadlocks when we
1631 * re-enter the quota code and are already holding the mutex */ 1666 * re-enter the quota code and are already holding the mutex */
1632 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { 1667 if (!dquot_active(inode)) {
1633 inode_decr_space(inode, number, reserve); 1668 inode_decr_space(inode, number, reserve);
1634 return; 1669 return;
1635 } 1670 }
@@ -1667,7 +1702,7 @@ void dquot_free_inode(const struct inode *inode)
1667 1702
1668 /* First test before acquiring mutex - solves deadlocks when we 1703 /* First test before acquiring mutex - solves deadlocks when we
1669 * re-enter the quota code and are already holding the mutex */ 1704 * re-enter the quota code and are already holding the mutex */
1670 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) 1705 if (!dquot_active(inode))
1671 return; 1706 return;
1672 1707
1673 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1708 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1790,7 +1825,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1790 struct super_block *sb = inode->i_sb; 1825 struct super_block *sb = inode->i_sb;
1791 int ret; 1826 int ret;
1792 1827
1793 if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode)) 1828 if (!dquot_active(inode))
1794 return 0; 1829 return 0;
1795 1830
1796 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) 1831 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
@@ -1957,7 +1992,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
1957 truncate_inode_pages(&toputinode[cnt]->i_data, 1992 truncate_inode_pages(&toputinode[cnt]->i_data,
1958 0); 1993 0);
1959 mutex_unlock(&toputinode[cnt]->i_mutex); 1994 mutex_unlock(&toputinode[cnt]->i_mutex);
1960 mark_inode_dirty(toputinode[cnt]); 1995 mark_inode_dirty_sync(toputinode[cnt]);
1961 } 1996 }
1962 mutex_unlock(&dqopt->dqonoff_mutex); 1997 mutex_unlock(&dqopt->dqonoff_mutex);
1963 } 1998 }
@@ -2270,7 +2305,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2270 memset(di, 0, sizeof(*di)); 2305 memset(di, 0, sizeof(*di));
2271 di->d_version = FS_DQUOT_VERSION; 2306 di->d_version = FS_DQUOT_VERSION;
2272 di->d_flags = dquot->dq_type == USRQUOTA ? 2307 di->d_flags = dquot->dq_type == USRQUOTA ?
2273 XFS_USER_QUOTA : XFS_GROUP_QUOTA; 2308 FS_USER_QUOTA : FS_GROUP_QUOTA;
2274 di->d_id = dquot->dq_id; 2309 di->d_id = dquot->dq_id;
2275 2310
2276 spin_lock(&dq_data_lock); 2311 spin_lock(&dq_data_lock);
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 24f03407eeb5..9e48874eabcc 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -65,8 +65,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
65 ret = sb->s_op->quota_write(sb, info->dqi_type, buf, 65 ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
66 info->dqi_usable_bs, blk << info->dqi_blocksize_bits); 66 info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
67 if (ret != info->dqi_usable_bs) { 67 if (ret != info->dqi_usable_bs) {
68 q_warn(KERN_WARNING "VFS: dquota write failed on " 68 quota_error(sb, "dquota write failed");
69 "dev %s\n", sb->s_id);
70 if (ret >= 0) 69 if (ret >= 0)
71 ret = -EIO; 70 ret = -EIO;
72 } 71 }
@@ -160,9 +159,8 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf,
160 dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); 159 dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
161 /* No matter whether write succeeds block is out of list */ 160 /* No matter whether write succeeds block is out of list */
162 if (write_blk(info, blk, buf) < 0) 161 if (write_blk(info, blk, buf) < 0)
163 q_warn(KERN_ERR 162 quota_error(info->dqi_sb, "Can't write block (%u) "
164 "VFS: Can't write block (%u) with free entries.\n", 163 "with free entries", blk);
165 blk);
166 return 0; 164 return 0;
167out_buf: 165out_buf:
168 kfree(tmpbuf); 166 kfree(tmpbuf);
@@ -252,9 +250,8 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
252 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { 250 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
253 *err = remove_free_dqentry(info, buf, blk); 251 *err = remove_free_dqentry(info, buf, blk);
254 if (*err < 0) { 252 if (*err < 0) {
255 q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't " 253 quota_error(dquot->dq_sb, "Can't remove block (%u) "
256 "remove block (%u) from entry free list.\n", 254 "from entry free list", blk);
257 blk);
258 goto out_buf; 255 goto out_buf;
259 } 256 }
260 } 257 }
@@ -268,16 +265,15 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
268 } 265 }
269#ifdef __QUOTA_QT_PARANOIA 266#ifdef __QUOTA_QT_PARANOIA
270 if (i == qtree_dqstr_in_blk(info)) { 267 if (i == qtree_dqstr_in_blk(info)) {
271 printk(KERN_ERR "VFS: find_free_dqentry(): Data block full " 268 quota_error(dquot->dq_sb, "Data block full but it shouldn't");
272 "but it shouldn't.\n");
273 *err = -EIO; 269 *err = -EIO;
274 goto out_buf; 270 goto out_buf;
275 } 271 }
276#endif 272#endif
277 *err = write_blk(info, blk, buf); 273 *err = write_blk(info, blk, buf);
278 if (*err < 0) { 274 if (*err < 0) {
279 q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " 275 quota_error(dquot->dq_sb, "Can't write quota data block %u",
280 "data block %u.\n", blk); 276 blk);
281 goto out_buf; 277 goto out_buf;
282 } 278 }
283 dquot->dq_off = (blk << info->dqi_blocksize_bits) + 279 dquot->dq_off = (blk << info->dqi_blocksize_bits) +
@@ -311,8 +307,8 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
311 } else { 307 } else {
312 ret = read_blk(info, *treeblk, buf); 308 ret = read_blk(info, *treeblk, buf);
313 if (ret < 0) { 309 if (ret < 0) {
314 q_warn(KERN_ERR "VFS: Can't read tree quota block " 310 quota_error(dquot->dq_sb, "Can't read tree quota "
315 "%u.\n", *treeblk); 311 "block %u", *treeblk);
316 goto out_buf; 312 goto out_buf;
317 } 313 }
318 } 314 }
@@ -323,9 +319,9 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
323 if (depth == info->dqi_qtree_depth - 1) { 319 if (depth == info->dqi_qtree_depth - 1) {
324#ifdef __QUOTA_QT_PARANOIA 320#ifdef __QUOTA_QT_PARANOIA
325 if (newblk) { 321 if (newblk) {
326 printk(KERN_ERR "VFS: Inserting already present quota " 322 quota_error(dquot->dq_sb, "Inserting already present "
327 "entry (block %u).\n", 323 "quota entry (block %u)",
328 le32_to_cpu(ref[get_index(info, 324 le32_to_cpu(ref[get_index(info,
329 dquot->dq_id, depth)])); 325 dquot->dq_id, depth)]));
330 ret = -EIO; 326 ret = -EIO;
331 goto out_buf; 327 goto out_buf;
@@ -373,8 +369,8 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
373 if (!dquot->dq_off) { 369 if (!dquot->dq_off) {
374 ret = dq_insert_tree(info, dquot); 370 ret = dq_insert_tree(info, dquot);
375 if (ret < 0) { 371 if (ret < 0) {
376 q_warn(KERN_ERR "VFS: Error %zd occurred while " 372 quota_error(sb, "Error %zd occurred while creating "
377 "creating quota.\n", ret); 373 "quota", ret);
378 kfree(ddquot); 374 kfree(ddquot);
379 return ret; 375 return ret;
380 } 376 }
@@ -385,8 +381,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
385 ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, 381 ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
386 dquot->dq_off); 382 dquot->dq_off);
387 if (ret != info->dqi_entry_size) { 383 if (ret != info->dqi_entry_size) {
388 q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n", 384 quota_error(sb, "dquota write failed");
389 sb->s_id);
390 if (ret >= 0) 385 if (ret >= 0)
391 ret = -ENOSPC; 386 ret = -ENOSPC;
392 } else { 387 } else {
@@ -410,14 +405,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
410 if (!buf) 405 if (!buf)
411 return -ENOMEM; 406 return -ENOMEM;
412 if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { 407 if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
413 q_warn(KERN_ERR "VFS: Quota structure has offset to other " 408 quota_error(dquot->dq_sb, "Quota structure has offset to "
414 "block (%u) than it should (%u).\n", blk, 409 "other block (%u) than it should (%u)", blk,
415 (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); 410 (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
416 goto out_buf; 411 goto out_buf;
417 } 412 }
418 ret = read_blk(info, blk, buf); 413 ret = read_blk(info, blk, buf);
419 if (ret < 0) { 414 if (ret < 0) {
420 q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk); 415 quota_error(dquot->dq_sb, "Can't read quota data block %u",
416 blk);
421 goto out_buf; 417 goto out_buf;
422 } 418 }
423 dh = (struct qt_disk_dqdbheader *)buf; 419 dh = (struct qt_disk_dqdbheader *)buf;
@@ -427,8 +423,8 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
427 if (ret >= 0) 423 if (ret >= 0)
428 ret = put_free_dqblk(info, buf, blk); 424 ret = put_free_dqblk(info, buf, blk);
429 if (ret < 0) { 425 if (ret < 0) {
430 q_warn(KERN_ERR "VFS: Can't move quota data block (%u) " 426 quota_error(dquot->dq_sb, "Can't move quota data block "
431 "to free list.\n", blk); 427 "(%u) to free list", blk);
432 goto out_buf; 428 goto out_buf;
433 } 429 }
434 } else { 430 } else {
@@ -440,15 +436,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
440 /* Insert will write block itself */ 436 /* Insert will write block itself */
441 ret = insert_free_dqentry(info, buf, blk); 437 ret = insert_free_dqentry(info, buf, blk);
442 if (ret < 0) { 438 if (ret < 0) {
443 q_warn(KERN_ERR "VFS: Can't insert quota data " 439 quota_error(dquot->dq_sb, "Can't insert quota "
444 "block (%u) to free entry list.\n", blk); 440 "data block (%u) to free entry list", blk);
445 goto out_buf; 441 goto out_buf;
446 } 442 }
447 } else { 443 } else {
448 ret = write_blk(info, blk, buf); 444 ret = write_blk(info, blk, buf);
449 if (ret < 0) { 445 if (ret < 0) {
450 q_warn(KERN_ERR "VFS: Can't write quota data " 446 quota_error(dquot->dq_sb, "Can't write quota "
451 "block %u\n", blk); 447 "data block %u", blk);
452 goto out_buf; 448 goto out_buf;
453 } 449 }
454 } 450 }
@@ -472,7 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
472 return -ENOMEM; 468 return -ENOMEM;
473 ret = read_blk(info, *blk, buf); 469 ret = read_blk(info, *blk, buf);
474 if (ret < 0) { 470 if (ret < 0) {
475 q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); 471 quota_error(dquot->dq_sb, "Can't read quota data "
472 "block %u", blk);
476 goto out_buf; 473 goto out_buf;
477 } 474 }
478 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); 475 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -496,8 +493,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
496 } else { 493 } else {
497 ret = write_blk(info, *blk, buf); 494 ret = write_blk(info, *blk, buf);
498 if (ret < 0) 495 if (ret < 0)
499 q_warn(KERN_ERR "VFS: Can't write quota tree " 496 quota_error(dquot->dq_sb, "Can't write quota "
500 "block %u.\n", *blk); 497 "tree block %u", blk);
501 } 498 }
502 } 499 }
503out_buf: 500out_buf:
@@ -529,7 +526,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
529 return -ENOMEM; 526 return -ENOMEM;
530 ret = read_blk(info, blk, buf); 527 ret = read_blk(info, blk, buf);
531 if (ret < 0) { 528 if (ret < 0) {
532 q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); 529 quota_error(dquot->dq_sb, "Can't read quota tree "
530 "block %u", blk);
533 goto out_buf; 531 goto out_buf;
534 } 532 }
535 ddquot = buf + sizeof(struct qt_disk_dqdbheader); 533 ddquot = buf + sizeof(struct qt_disk_dqdbheader);
@@ -539,8 +537,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
539 ddquot += info->dqi_entry_size; 537 ddquot += info->dqi_entry_size;
540 } 538 }
541 if (i == qtree_dqstr_in_blk(info)) { 539 if (i == qtree_dqstr_in_blk(info)) {
542 q_warn(KERN_ERR "VFS: Quota for id %u referenced " 540 quota_error(dquot->dq_sb, "Quota for id %u referenced "
543 "but not present.\n", dquot->dq_id); 541 "but not present", dquot->dq_id);
544 ret = -EIO; 542 ret = -EIO;
545 goto out_buf; 543 goto out_buf;
546 } else { 544 } else {
@@ -564,7 +562,8 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
564 return -ENOMEM; 562 return -ENOMEM;
565 ret = read_blk(info, blk, buf); 563 ret = read_blk(info, blk, buf);
566 if (ret < 0) { 564 if (ret < 0) {
567 q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); 565 quota_error(dquot->dq_sb, "Can't read quota tree block %u",
566 blk);
568 goto out_buf; 567 goto out_buf;
569 } 568 }
570 ret = 0; 569 ret = 0;
@@ -598,7 +597,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
598#ifdef __QUOTA_QT_PARANOIA 597#ifdef __QUOTA_QT_PARANOIA
599 /* Invalidated quota? */ 598 /* Invalidated quota? */
600 if (!sb_dqopt(dquot->dq_sb)->files[type]) { 599 if (!sb_dqopt(dquot->dq_sb)->files[type]) {
601 printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); 600 quota_error(sb, "Quota invalidated while reading!");
602 return -EIO; 601 return -EIO;
603 } 602 }
604#endif 603#endif
@@ -607,8 +606,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
607 offset = find_dqentry(info, dquot); 606 offset = find_dqentry(info, dquot);
608 if (offset <= 0) { /* Entry not present? */ 607 if (offset <= 0) { /* Entry not present? */
609 if (offset < 0) 608 if (offset < 0)
610 q_warn(KERN_ERR "VFS: Can't read quota " 609 quota_error(sb, "Can't read quota structure "
611 "structure for id %u.\n", dquot->dq_id); 610 "for id %u", dquot->dq_id);
612 dquot->dq_off = 0; 611 dquot->dq_off = 0;
613 set_bit(DQ_FAKE_B, &dquot->dq_flags); 612 set_bit(DQ_FAKE_B, &dquot->dq_flags);
614 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); 613 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
@@ -625,8 +624,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
625 if (ret != info->dqi_entry_size) { 624 if (ret != info->dqi_entry_size) {
626 if (ret >= 0) 625 if (ret >= 0)
627 ret = -EIO; 626 ret = -EIO;
628 q_warn(KERN_ERR "VFS: Error while reading quota " 627 quota_error(sb, "Error while reading quota structure for id %u",
629 "structure for id %u.\n", dquot->dq_id); 628 dquot->dq_id);
630 set_bit(DQ_FAKE_B, &dquot->dq_flags); 629 set_bit(DQ_FAKE_B, &dquot->dq_flags);
631 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); 630 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
632 kfree(ddquot); 631 kfree(ddquot);
diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h
index ccc3e71fb1d8..a1ab8db81a51 100644
--- a/fs/quota/quota_tree.h
+++ b/fs/quota/quota_tree.h
@@ -22,10 +22,4 @@ struct qt_disk_dqdbheader {
22 22
23#define QT_TREEOFF 1 /* Offset of tree in file in blocks */ 23#define QT_TREEOFF 1 /* Offset of tree in file in blocks */
24 24
25#define q_warn(fmt, args...) \
26do { \
27 if (printk_ratelimit()) \
28 printk(fmt, ## args); \
29} while(0)
30
31#endif /* _LINUX_QUOTAIO_TREE_H */ 25#endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 4af344c5852a..34b37a67bb16 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -95,8 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
95 (char *)&dqblk, sizeof(struct v1_disk_dqblk), 95 (char *)&dqblk, sizeof(struct v1_disk_dqblk),
96 v1_dqoff(dquot->dq_id)); 96 v1_dqoff(dquot->dq_id));
97 if (ret != sizeof(struct v1_disk_dqblk)) { 97 if (ret != sizeof(struct v1_disk_dqblk)) {
98 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", 98 quota_error(dquot->dq_sb, "dquota write failed");
99 dquot->dq_sb->s_id);
100 if (ret >= 0) 99 if (ret >= 0)
101 ret = -EIO; 100 ret = -EIO;
102 goto out; 101 goto out;
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 135206af1458..65444d29406b 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -63,9 +63,8 @@ static int v2_read_header(struct super_block *sb, int type,
63 size = sb->s_op->quota_read(sb, type, (char *)dqhead, 63 size = sb->s_op->quota_read(sb, type, (char *)dqhead,
64 sizeof(struct v2_disk_dqheader), 0); 64 sizeof(struct v2_disk_dqheader), 0);
65 if (size != sizeof(struct v2_disk_dqheader)) { 65 if (size != sizeof(struct v2_disk_dqheader)) {
66 q_warn(KERN_WARNING "quota_v2: Failed header read:" 66 quota_error(sb, "Failed header read: expected=%zd got=%zd",
67 " expected=%zd got=%zd\n", 67 sizeof(struct v2_disk_dqheader), size);
68 sizeof(struct v2_disk_dqheader), size);
69 return 0; 68 return 0;
70 } 69 }
71 return 1; 70 return 1;
@@ -106,8 +105,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo, 105 size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 106 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
108 if (size != sizeof(struct v2_disk_dqinfo)) { 107 if (size != sizeof(struct v2_disk_dqinfo)) {
109 q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", 108 quota_error(sb, "Can't read info structure");
110 sb->s_id);
111 return -1; 109 return -1;
112 } 110 }
113 info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS); 111 info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
@@ -167,8 +165,7 @@ static int v2_write_file_info(struct super_block *sb, int type)
167 size = sb->s_op->quota_write(sb, type, (char *)&dinfo, 165 size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
168 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 166 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
169 if (size != sizeof(struct v2_disk_dqinfo)) { 167 if (size != sizeof(struct v2_disk_dqinfo)) {
170 q_warn(KERN_WARNING "Can't write info structure on device %s.\n", 168 quota_error(sb, "Can't write info structure");
171 sb->s_id);
172 return -1; 169 return -1;
173 } 170 }
174 return 0; 171 return 0;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d532c20fc179..9eead2c796b7 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -146,9 +146,8 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
146 return ret; 146 return ret;
147 } 147 }
148 148
149 ret = simple_setsize(inode, newsize); 149 truncate_setsize(inode, newsize);
150 150 return 0;
151 return ret;
152} 151}
153 152
154/*****************************************************************************/ 153/*****************************************************************************/
@@ -183,7 +182,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
183 } 182 }
184 } 183 }
185 184
186 generic_setattr(inode, ia); 185 setattr_copy(inode, ia);
187 out: 186 out:
188 ia->ia_valid = old_ia_valid; 187 ia->ia_valid = old_ia_valid;
189 return ret; 188 return ret;
diff --git a/fs/readdir.c b/fs/readdir.c
index 7723401f8d8b..356f71528ad6 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -4,6 +4,7 @@
4 * Copyright (C) 1995 Linus Torvalds 4 * Copyright (C) 1995 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/stddef.h>
7#include <linux/kernel.h> 8#include <linux/kernel.h>
8#include <linux/module.h> 9#include <linux/module.h>
9#include <linux/time.h> 10#include <linux/time.h>
@@ -54,7 +55,6 @@ EXPORT_SYMBOL(vfs_readdir);
54 * anyway. Thus the special "fillonedir()" function for that 55 * anyway. Thus the special "fillonedir()" function for that
55 * case (the low-level handlers don't need to care about this). 56 * case (the low-level handlers don't need to care about this).
56 */ 57 */
57#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
58 58
59#ifdef __ARCH_WANT_OLD_READDIR 59#ifdef __ARCH_WANT_OLD_READDIR
60 60
@@ -152,7 +152,8 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
152 struct linux_dirent __user * dirent; 152 struct linux_dirent __user * dirent;
153 struct getdents_callback * buf = (struct getdents_callback *) __buf; 153 struct getdents_callback * buf = (struct getdents_callback *) __buf;
154 unsigned long d_ino; 154 unsigned long d_ino;
155 int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(long)); 155 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
156 sizeof(long));
156 157
157 buf->error = -EINVAL; /* only used if we fail.. */ 158 buf->error = -EINVAL; /* only used if we fail.. */
158 if (reclen > buf->count) 159 if (reclen > buf->count)
@@ -237,7 +238,8 @@ static int filldir64(void * __buf, const char * name, int namlen, loff_t offset,
237{ 238{
238 struct linux_dirent64 __user *dirent; 239 struct linux_dirent64 __user *dirent;
239 struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; 240 struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf;
240 int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(u64)); 241 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
242 sizeof(u64));
241 243
242 buf->error = -EINVAL; /* only used if we fail.. */ 244 buf->error = -EINVAL; /* only used if we fail.. */
243 if (reclen > buf->count) 245 if (reclen > buf->count)
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index b82cdd8a45dd..6846371498b6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -38,20 +38,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
38 38
39 BUG_ON(!S_ISREG(inode->i_mode)); 39 BUG_ON(!S_ISREG(inode->i_mode));
40 40
41 if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
42 return 0;
43
44 mutex_lock(&(REISERFS_I(inode)->tailpack));
45
46 if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
47 mutex_unlock(&(REISERFS_I(inode)->tailpack));
48 return 0;
49 }
50
41 /* fast out for when nothing needs to be done */ 51 /* fast out for when nothing needs to be done */
42 if ((atomic_read(&inode->i_count) > 1 || 52 if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
43 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
44 !tail_has_to_be_packed(inode)) && 53 !tail_has_to_be_packed(inode)) &&
45 REISERFS_I(inode)->i_prealloc_count <= 0) { 54 REISERFS_I(inode)->i_prealloc_count <= 0) {
55 mutex_unlock(&(REISERFS_I(inode)->tailpack));
46 return 0; 56 return 0;
47 } 57 }
48 58
49 mutex_lock(&inode->i_mutex);
50
51 mutex_lock(&(REISERFS_I(inode)->i_mmap));
52 if (REISERFS_I(inode)->i_flags & i_ever_mapped)
53 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
54
55 reiserfs_write_lock(inode->i_sb); 59 reiserfs_write_lock(inode->i_sb);
56 /* freeing preallocation only involves relogging blocks that 60 /* freeing preallocation only involves relogging blocks that
57 * are already in the current transaction. preallocation gets 61 * are already in the current transaction. preallocation gets
@@ -94,9 +98,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
94 if (!err) 98 if (!err)
95 err = jbegin_failure; 99 err = jbegin_failure;
96 100
97 if (!err && atomic_read(&inode->i_count) <= 1 && 101 if (!err &&
98 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 102 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
99 tail_has_to_be_packed(inode)) { 103 tail_has_to_be_packed(inode)) {
104
100 /* if regular file is released by last holder and it has been 105 /* if regular file is released by last holder and it has been
101 appended (we append by unformatted node only) or its direct 106 appended (we append by unformatted node only) or its direct
102 item(s) had to be converted, then it may have to be 107 item(s) had to be converted, then it may have to be
@@ -104,27 +109,28 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
104 err = reiserfs_truncate_file(inode, 0); 109 err = reiserfs_truncate_file(inode, 0);
105 } 110 }
106 out: 111 out:
107 mutex_unlock(&(REISERFS_I(inode)->i_mmap));
108 mutex_unlock(&inode->i_mutex);
109 reiserfs_write_unlock(inode->i_sb); 112 reiserfs_write_unlock(inode->i_sb);
113 mutex_unlock(&(REISERFS_I(inode)->tailpack));
110 return err; 114 return err;
111} 115}
112 116
113static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) 117static int reiserfs_file_open(struct inode *inode, struct file *file)
114{ 118{
115 struct inode *inode; 119 int err = dquot_file_open(inode, file);
116 120 if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
117 inode = file->f_path.dentry->d_inode; 121 /* somebody might be tailpacking on final close; wait for it */
118 mutex_lock(&(REISERFS_I(inode)->i_mmap)); 122 mutex_lock(&(REISERFS_I(inode)->tailpack));
119 REISERFS_I(inode)->i_flags |= i_ever_mapped; 123 atomic_inc(&REISERFS_I(inode)->openers);
120 mutex_unlock(&(REISERFS_I(inode)->i_mmap)); 124 mutex_unlock(&(REISERFS_I(inode)->tailpack));
121 125 }
122 return generic_file_mmap(file, vma); 126 return err;
123} 127}
124 128
125static void reiserfs_vfs_truncate_file(struct inode *inode) 129static void reiserfs_vfs_truncate_file(struct inode *inode)
126{ 130{
131 mutex_lock(&(REISERFS_I(inode)->tailpack));
127 reiserfs_truncate_file(inode, 1); 132 reiserfs_truncate_file(inode, 1);
133 mutex_unlock(&(REISERFS_I(inode)->tailpack));
128} 134}
129 135
130/* Sync a reiserfs file. */ 136/* Sync a reiserfs file. */
@@ -288,8 +294,8 @@ const struct file_operations reiserfs_file_operations = {
288#ifdef CONFIG_COMPAT 294#ifdef CONFIG_COMPAT
289 .compat_ioctl = reiserfs_compat_ioctl, 295 .compat_ioctl = reiserfs_compat_ioctl,
290#endif 296#endif
291 .mmap = reiserfs_file_mmap, 297 .mmap = generic_file_mmap,
292 .open = dquot_file_open, 298 .open = reiserfs_file_open,
293 .release = reiserfs_file_release, 299 .release = reiserfs_file_release,
294 .fsync = reiserfs_sync_file, 300 .fsync = reiserfs_sync_file,
295 .aio_read = generic_file_aio_read, 301 .aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0f22fdaf54ac..ae35413dcbe1 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -25,7 +25,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
25int reiserfs_prepare_write(struct file *f, struct page *page, 25int reiserfs_prepare_write(struct file *f, struct page *page,
26 unsigned from, unsigned to); 26 unsigned from, unsigned to);
27 27
28void reiserfs_delete_inode(struct inode *inode) 28void reiserfs_evict_inode(struct inode *inode)
29{ 29{
30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */
31 int jbegin_count = 31 int jbegin_count =
@@ -35,10 +35,12 @@ void reiserfs_delete_inode(struct inode *inode)
35 int depth; 35 int depth;
36 int err; 36 int err;
37 37
38 if (!is_bad_inode(inode)) 38 if (!inode->i_nlink && !is_bad_inode(inode))
39 dquot_initialize(inode); 39 dquot_initialize(inode);
40 40
41 truncate_inode_pages(&inode->i_data, 0); 41 truncate_inode_pages(&inode->i_data, 0);
42 if (inode->i_nlink)
43 goto no_delete;
42 44
43 depth = reiserfs_write_lock_once(inode->i_sb); 45 depth = reiserfs_write_lock_once(inode->i_sb);
44 46
@@ -77,9 +79,14 @@ void reiserfs_delete_inode(struct inode *inode)
77 ; 79 ;
78 } 80 }
79 out: 81 out:
80 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 82 end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */
83 dquot_drop(inode);
81 inode->i_blocks = 0; 84 inode->i_blocks = 0;
82 reiserfs_write_unlock_once(inode->i_sb, depth); 85 reiserfs_write_unlock_once(inode->i_sb, depth);
86
87no_delete:
88 end_writeback(inode);
89 dquot_drop(inode);
83} 90}
84 91
85static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 92static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -1138,7 +1145,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
1138 REISERFS_I(inode)->i_prealloc_count = 0; 1145 REISERFS_I(inode)->i_prealloc_count = 0;
1139 REISERFS_I(inode)->i_trans_id = 0; 1146 REISERFS_I(inode)->i_trans_id = 0;
1140 REISERFS_I(inode)->i_jl = NULL; 1147 REISERFS_I(inode)->i_jl = NULL;
1141 mutex_init(&(REISERFS_I(inode)->i_mmap));
1142 reiserfs_init_xattr_rwsem(inode); 1148 reiserfs_init_xattr_rwsem(inode);
1143 1149
1144 if (stat_data_v1(ih)) { 1150 if (stat_data_v1(ih)) {
@@ -1221,7 +1227,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1221 inode_set_bytes(inode, 1227 inode_set_bytes(inode,
1222 to_real_used_space(inode, inode->i_blocks, 1228 to_real_used_space(inode, inode->i_blocks,
1223 SD_V2_SIZE)); 1229 SD_V2_SIZE));
1224 /* read persistent inode attributes from sd and initalise 1230 /* read persistent inode attributes from sd and initialise
1225 generic inode flags from them */ 1231 generic inode flags from them */
1226 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); 1232 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1227 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); 1233 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
@@ -1841,7 +1847,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1841 REISERFS_I(inode)->i_attrs = 1847 REISERFS_I(inode)->i_attrs =
1842 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1848 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1843 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1849 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1844 mutex_init(&(REISERFS_I(inode)->i_mmap));
1845 reiserfs_init_xattr_rwsem(inode); 1850 reiserfs_init_xattr_rwsem(inode);
1846 1851
1847 /* key to search for correct place for new stat data */ 1852 /* key to search for correct place for new stat data */
@@ -2587,8 +2592,7 @@ static int reiserfs_write_begin(struct file *file,
2587 old_ref = th->t_refcount; 2592 old_ref = th->t_refcount;
2588 th->t_refcount++; 2593 th->t_refcount++;
2589 } 2594 }
2590 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2595 ret = __block_write_begin(page, pos, len, reiserfs_get_block);
2591 reiserfs_get_block);
2592 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2596 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2593 struct reiserfs_transaction_handle *th = current->journal_info; 2597 struct reiserfs_transaction_handle *th = current->journal_info;
2594 /* this gets a little ugly. If reiserfs_get_block returned an 2598 /* this gets a little ugly. If reiserfs_get_block returned an
@@ -3059,10 +3063,25 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3059{ 3063{
3060 struct file *file = iocb->ki_filp; 3064 struct file *file = iocb->ki_filp;
3061 struct inode *inode = file->f_mapping->host; 3065 struct inode *inode = file->f_mapping->host;
3066 ssize_t ret;
3062 3067
3063 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3068 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
3064 offset, nr_segs, 3069 offset, nr_segs,
3065 reiserfs_get_blocks_direct_io, NULL); 3070 reiserfs_get_blocks_direct_io, NULL);
3071
3072 /*
3073 * In case of error extending write may have instantiated a few
3074 * blocks outside i_size. Trim these off again.
3075 */
3076 if (unlikely((rw & WRITE) && ret < 0)) {
3077 loff_t isize = i_size_read(inode);
3078 loff_t end = offset + iov_length(iov, nr_segs);
3079
3080 if (end > isize)
3081 vmtruncate(inode, isize);
3082 }
3083
3084 return ret;
3066} 3085}
3067 3086
3068int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3087int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -3072,6 +3091,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3072 int depth; 3091 int depth;
3073 int error; 3092 int error;
3074 3093
3094 error = inode_change_ok(inode, attr);
3095 if (error)
3096 return error;
3097
3075 /* must be turned off for recursive notify_change calls */ 3098 /* must be turned off for recursive notify_change calls */
3076 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3099 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3077 3100
@@ -3121,55 +3144,58 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3121 goto out; 3144 goto out;
3122 } 3145 }
3123 3146
3124 error = inode_change_ok(inode, attr); 3147 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3125 if (!error) { 3148 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3126 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 3149 struct reiserfs_transaction_handle th;
3127 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 3150 int jbegin_count =
3128 error = reiserfs_chown_xattrs(inode, attr); 3151 2 *
3152 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
3153 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
3154 2;
3129 3155
3130 if (!error) { 3156 error = reiserfs_chown_xattrs(inode, attr);
3131 struct reiserfs_transaction_handle th; 3157
3132 int jbegin_count = 3158 if (error)
3133 2 * 3159 return error;
3134 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + 3160
3135 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + 3161 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
3136 2; 3162 error = journal_begin(&th, inode->i_sb, jbegin_count);
3137 3163 if (error)
3138 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 3164 goto out;
3139 error = 3165 error = dquot_transfer(inode, attr);
3140 journal_begin(&th, inode->i_sb, 3166 if (error) {
3141 jbegin_count); 3167 journal_end(&th, inode->i_sb, jbegin_count);
3142 if (error) 3168 goto out;
3143 goto out;
3144 error = dquot_transfer(inode, attr);
3145 if (error) {
3146 journal_end(&th, inode->i_sb,
3147 jbegin_count);
3148 goto out;
3149 }
3150 /* Update corresponding info in inode so that everything is in
3151 * one transaction */
3152 if (attr->ia_valid & ATTR_UID)
3153 inode->i_uid = attr->ia_uid;
3154 if (attr->ia_valid & ATTR_GID)
3155 inode->i_gid = attr->ia_gid;
3156 mark_inode_dirty(inode);
3157 error =
3158 journal_end(&th, inode->i_sb, jbegin_count);
3159 }
3160 }
3161 if (!error) {
3162 /*
3163 * Relax the lock here, as it might truncate the
3164 * inode pages and wait for inode pages locks.
3165 * To release such page lock, the owner needs the
3166 * reiserfs lock
3167 */
3168 reiserfs_write_unlock_once(inode->i_sb, depth);
3169 error = inode_setattr(inode, attr);
3170 depth = reiserfs_write_lock_once(inode->i_sb);
3171 } 3169 }
3170
3171 /* Update corresponding info in inode so that everything is in
3172 * one transaction */
3173 if (attr->ia_valid & ATTR_UID)
3174 inode->i_uid = attr->ia_uid;
3175 if (attr->ia_valid & ATTR_GID)
3176 inode->i_gid = attr->ia_gid;
3177 mark_inode_dirty(inode);
3178 error = journal_end(&th, inode->i_sb, jbegin_count);
3179 if (error)
3180 goto out;
3181 }
3182
3183 /*
3184 * Relax the lock here, as it might truncate the
3185 * inode pages and wait for inode pages locks.
3186 * To release such page lock, the owner needs the
3187 * reiserfs lock
3188 */
3189 reiserfs_write_unlock_once(inode->i_sb, depth);
3190 if ((attr->ia_valid & ATTR_SIZE) &&
3191 attr->ia_size != i_size_read(inode))
3192 error = vmtruncate(inode, attr->ia_size);
3193
3194 if (!error) {
3195 setattr_copy(inode, attr);
3196 mark_inode_dirty(inode);
3172 } 3197 }
3198 depth = reiserfs_write_lock_once(inode->i_sb);
3173 3199
3174 if (!error && reiserfs_posixacl(inode->i_sb)) { 3200 if (!error && reiserfs_posixacl(inode->i_sb)) {
3175 if (attr->ia_valid & ATTR_MODE) 3201 if (attr->ia_valid & ATTR_MODE)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9822fa15118b..e15ff612002d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -525,6 +525,8 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
525 kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); 525 kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
526 if (!ei) 526 if (!ei)
527 return NULL; 527 return NULL;
528 atomic_set(&ei->openers, 0);
529 mutex_init(&ei->tailpack);
528 return &ei->vfs_inode; 530 return &ei->vfs_inode;
529} 531}
530 532
@@ -589,11 +591,6 @@ out:
589 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 591 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
590} 592}
591 593
592static void reiserfs_clear_inode(struct inode *inode)
593{
594 dquot_drop(inode);
595}
596
597#ifdef CONFIG_QUOTA 594#ifdef CONFIG_QUOTA
598static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 595static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
599 size_t, loff_t); 596 size_t, loff_t);
@@ -606,8 +603,7 @@ static const struct super_operations reiserfs_sops = {
606 .destroy_inode = reiserfs_destroy_inode, 603 .destroy_inode = reiserfs_destroy_inode,
607 .write_inode = reiserfs_write_inode, 604 .write_inode = reiserfs_write_inode,
608 .dirty_inode = reiserfs_dirty_inode, 605 .dirty_inode = reiserfs_dirty_inode,
609 .clear_inode = reiserfs_clear_inode, 606 .evict_inode = reiserfs_evict_inode,
610 .delete_inode = reiserfs_delete_inode,
611 .put_super = reiserfs_put_super, 607 .put_super = reiserfs_put_super,
612 .write_super = reiserfs_write_super, 608 .write_super = reiserfs_write_super,
613 .sync_fs = reiserfs_sync_fs, 609 .sync_fs = reiserfs_sync_fs,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9551cb6f7fe4..450c91941988 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -46,7 +46,7 @@
46 46
47#define SMB_TTL_DEFAULT 1000 47#define SMB_TTL_DEFAULT 1000
48 48
49static void smb_delete_inode(struct inode *); 49static void smb_evict_inode(struct inode *);
50static void smb_put_super(struct super_block *); 50static void smb_put_super(struct super_block *);
51static int smb_statfs(struct dentry *, struct kstatfs *); 51static int smb_statfs(struct dentry *, struct kstatfs *);
52static int smb_show_options(struct seq_file *, struct vfsmount *); 52static int smb_show_options(struct seq_file *, struct vfsmount *);
@@ -102,7 +102,7 @@ static const struct super_operations smb_sops =
102 .alloc_inode = smb_alloc_inode, 102 .alloc_inode = smb_alloc_inode,
103 .destroy_inode = smb_destroy_inode, 103 .destroy_inode = smb_destroy_inode,
104 .drop_inode = generic_delete_inode, 104 .drop_inode = generic_delete_inode,
105 .delete_inode = smb_delete_inode, 105 .evict_inode = smb_evict_inode,
106 .put_super = smb_put_super, 106 .put_super = smb_put_super,
107 .statfs = smb_statfs, 107 .statfs = smb_statfs,
108 .show_options = smb_show_options, 108 .show_options = smb_show_options,
@@ -324,15 +324,15 @@ out:
324 * All blocking cleanup operations need to go here to avoid races. 324 * All blocking cleanup operations need to go here to avoid races.
325 */ 325 */
326static void 326static void
327smb_delete_inode(struct inode *ino) 327smb_evict_inode(struct inode *ino)
328{ 328{
329 DEBUG1("ino=%ld\n", ino->i_ino); 329 DEBUG1("ino=%ld\n", ino->i_ino);
330 truncate_inode_pages(&ino->i_data, 0); 330 truncate_inode_pages(&ino->i_data, 0);
331 end_writeback(ino);
331 lock_kernel(); 332 lock_kernel();
332 if (smb_close(ino)) 333 if (smb_close(ino))
333 PARANOIA("could not close inode %ld\n", ino->i_ino); 334 PARANOIA("could not close inode %ld\n", ino->i_ino);
334 unlock_kernel(); 335 unlock_kernel();
335 clear_inode(ino);
336} 336}
337 337
338static struct option opts[] = { 338static struct option opts[] = {
@@ -714,9 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
714 error = server->ops->truncate(inode, attr->ia_size); 714 error = server->ops->truncate(inode, attr->ia_size);
715 if (error) 715 if (error)
716 goto out; 716 goto out;
717 error = simple_setsize(inode, attr->ia_size); 717 truncate_setsize(inode, attr->ia_size);
718 if (error)
719 goto out;
720 refresh = 1; 718 refresh = 1;
721 } 719 }
722 720
diff --git a/fs/statfs.c b/fs/statfs.c
index 4ef021f3b612..30ea8c8a996b 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -2,38 +2,83 @@
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/file.h> 4#include <linux/file.h>
5#include <linux/mount.h>
5#include <linux/namei.h> 6#include <linux/namei.h>
6#include <linux/statfs.h> 7#include <linux/statfs.h>
7#include <linux/security.h> 8#include <linux/security.h>
8#include <linux/uaccess.h> 9#include <linux/uaccess.h>
9 10
10int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 11static int flags_by_mnt(int mnt_flags)
11{ 12{
12 int retval = -ENODEV; 13 int flags = 0;
13 14
14 if (dentry) { 15 if (mnt_flags & MNT_READONLY)
15 retval = -ENOSYS; 16 flags |= ST_RDONLY;
16 if (dentry->d_sb->s_op->statfs) { 17 if (mnt_flags & MNT_NOSUID)
17 memset(buf, 0, sizeof(*buf)); 18 flags |= ST_NOSUID;
18 retval = security_sb_statfs(dentry); 19 if (mnt_flags & MNT_NODEV)
19 if (retval) 20 flags |= ST_NODEV;
20 return retval; 21 if (mnt_flags & MNT_NOEXEC)
21 retval = dentry->d_sb->s_op->statfs(dentry, buf); 22 flags |= ST_NOEXEC;
22 if (retval == 0 && buf->f_frsize == 0) 23 if (mnt_flags & MNT_NOATIME)
23 buf->f_frsize = buf->f_bsize; 24 flags |= ST_NOATIME;
24 } 25 if (mnt_flags & MNT_NODIRATIME)
25 } 26 flags |= ST_NODIRATIME;
27 if (mnt_flags & MNT_RELATIME)
28 flags |= ST_RELATIME;
29 return flags;
30}
31
32static int flags_by_sb(int s_flags)
33{
34 int flags = 0;
35 if (s_flags & MS_SYNCHRONOUS)
36 flags |= ST_SYNCHRONOUS;
37 if (s_flags & MS_MANDLOCK)
38 flags |= ST_MANDLOCK;
39 return flags;
40}
41
42static int calculate_f_flags(struct vfsmount *mnt)
43{
44 return ST_VALID | flags_by_mnt(mnt->mnt_flags) |
45 flags_by_sb(mnt->mnt_sb->s_flags);
46}
47
48int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
49{
50 int retval;
51
52 if (!dentry->d_sb->s_op->statfs)
53 return -ENOSYS;
54
55 memset(buf, 0, sizeof(*buf));
56 retval = security_sb_statfs(dentry);
57 if (retval)
58 return retval;
59 retval = dentry->d_sb->s_op->statfs(dentry, buf);
60 if (retval == 0 && buf->f_frsize == 0)
61 buf->f_frsize = buf->f_bsize;
26 return retval; 62 return retval;
27} 63}
28 64
65int vfs_statfs(struct path *path, struct kstatfs *buf)
66{
67 int error;
68
69 error = statfs_by_dentry(path->dentry, buf);
70 if (!error)
71 buf->f_flags = calculate_f_flags(path->mnt);
72 return error;
73}
29EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
30 75
31static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) 76static int do_statfs_native(struct path *path, struct statfs *buf)
32{ 77{
33 struct kstatfs st; 78 struct kstatfs st;
34 int retval; 79 int retval;
35 80
36 retval = vfs_statfs(dentry, &st); 81 retval = vfs_statfs(path, &st);
37 if (retval) 82 if (retval)
38 return retval; 83 return retval;
39 84
@@ -67,17 +112,18 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
67 buf->f_fsid = st.f_fsid; 112 buf->f_fsid = st.f_fsid;
68 buf->f_namelen = st.f_namelen; 113 buf->f_namelen = st.f_namelen;
69 buf->f_frsize = st.f_frsize; 114 buf->f_frsize = st.f_frsize;
115 buf->f_flags = st.f_flags;
70 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 116 memset(buf->f_spare, 0, sizeof(buf->f_spare));
71 } 117 }
72 return 0; 118 return 0;
73} 119}
74 120
75static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) 121static int do_statfs64(struct path *path, struct statfs64 *buf)
76{ 122{
77 struct kstatfs st; 123 struct kstatfs st;
78 int retval; 124 int retval;
79 125
80 retval = vfs_statfs(dentry, &st); 126 retval = vfs_statfs(path, &st);
81 if (retval) 127 if (retval)
82 return retval; 128 return retval;
83 129
@@ -94,6 +140,7 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
94 buf->f_fsid = st.f_fsid; 140 buf->f_fsid = st.f_fsid;
95 buf->f_namelen = st.f_namelen; 141 buf->f_namelen = st.f_namelen;
96 buf->f_frsize = st.f_frsize; 142 buf->f_frsize = st.f_frsize;
143 buf->f_flags = st.f_flags;
97 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 144 memset(buf->f_spare, 0, sizeof(buf->f_spare));
98 } 145 }
99 return 0; 146 return 0;
@@ -107,7 +154,7 @@ SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, b
107 error = user_path(pathname, &path); 154 error = user_path(pathname, &path);
108 if (!error) { 155 if (!error) {
109 struct statfs tmp; 156 struct statfs tmp;
110 error = vfs_statfs_native(path.dentry, &tmp); 157 error = do_statfs_native(&path, &tmp);
111 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
112 error = -EFAULT; 159 error = -EFAULT;
113 path_put(&path); 160 path_put(&path);
@@ -125,7 +172,7 @@ SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct stat
125 error = user_path(pathname, &path); 172 error = user_path(pathname, &path);
126 if (!error) { 173 if (!error) {
127 struct statfs64 tmp; 174 struct statfs64 tmp;
128 error = vfs_statfs64(path.dentry, &tmp); 175 error = do_statfs64(&path, &tmp);
129 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
130 error = -EFAULT; 177 error = -EFAULT;
131 path_put(&path); 178 path_put(&path);
@@ -143,7 +190,7 @@ SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
143 file = fget(fd); 190 file = fget(fd);
144 if (!file) 191 if (!file)
145 goto out; 192 goto out;
146 error = vfs_statfs_native(file->f_path.dentry, &tmp); 193 error = do_statfs_native(&file->f_path, &tmp);
147 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
148 error = -EFAULT; 195 error = -EFAULT;
149 fput(file); 196 fput(file);
@@ -164,7 +211,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
164 file = fget(fd); 211 file = fget(fd);
165 if (!file) 212 if (!file)
166 goto out; 213 goto out;
167 error = vfs_statfs64(file->f_path.dentry, &tmp); 214 error = do_statfs64(&file->f_path, &tmp);
168 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
169 error = -EFAULT; 216 error = -EFAULT;
170 fput(file); 217 fput(file);
@@ -183,7 +230,7 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
183 if (!s) 230 if (!s)
184 return -EINVAL; 231 return -EINVAL;
185 232
186 err = vfs_statfs(s->s_root, &sbuf); 233 err = statfs_by_dentry(s->s_root, &sbuf);
187 drop_super(s); 234 drop_super(s);
188 if (err) 235 if (err)
189 return err; 236 return err;
diff --git a/fs/super.c b/fs/super.c
index 938119ab8dcb..9674ab2c8718 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -305,8 +305,13 @@ retry:
305 if (s) { 305 if (s) {
306 up_write(&s->s_umount); 306 up_write(&s->s_umount);
307 destroy_super(s); 307 destroy_super(s);
308 s = NULL;
308 } 309 }
309 down_write(&old->s_umount); 310 down_write(&old->s_umount);
311 if (unlikely(!(old->s_flags & MS_BORN))) {
312 deactivate_locked_super(old);
313 goto retry;
314 }
310 return old; 315 return old;
311 } 316 }
312 } 317 }
@@ -358,10 +363,10 @@ EXPORT_SYMBOL(drop_super);
358 */ 363 */
359void sync_supers(void) 364void sync_supers(void)
360{ 365{
361 struct super_block *sb, *n; 366 struct super_block *sb, *p = NULL;
362 367
363 spin_lock(&sb_lock); 368 spin_lock(&sb_lock);
364 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 369 list_for_each_entry(sb, &super_blocks, s_list) {
365 if (list_empty(&sb->s_instances)) 370 if (list_empty(&sb->s_instances))
366 continue; 371 continue;
367 if (sb->s_op->write_super && sb->s_dirt) { 372 if (sb->s_op->write_super && sb->s_dirt) {
@@ -374,11 +379,13 @@ void sync_supers(void)
374 up_read(&sb->s_umount); 379 up_read(&sb->s_umount);
375 380
376 spin_lock(&sb_lock); 381 spin_lock(&sb_lock);
377 /* lock was dropped, must reset next */ 382 if (p)
378 list_safe_reset_next(sb, n, s_list); 383 __put_super(p);
379 __put_super(sb); 384 p = sb;
380 } 385 }
381 } 386 }
387 if (p)
388 __put_super(p);
382 spin_unlock(&sb_lock); 389 spin_unlock(&sb_lock);
383} 390}
384 391
@@ -392,10 +399,10 @@ void sync_supers(void)
392 */ 399 */
393void iterate_supers(void (*f)(struct super_block *, void *), void *arg) 400void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
394{ 401{
395 struct super_block *sb, *n; 402 struct super_block *sb, *p = NULL;
396 403
397 spin_lock(&sb_lock); 404 spin_lock(&sb_lock);
398 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 405 list_for_each_entry(sb, &super_blocks, s_list) {
399 if (list_empty(&sb->s_instances)) 406 if (list_empty(&sb->s_instances))
400 continue; 407 continue;
401 sb->s_count++; 408 sb->s_count++;
@@ -407,10 +414,12 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
407 up_read(&sb->s_umount); 414 up_read(&sb->s_umount);
408 415
409 spin_lock(&sb_lock); 416 spin_lock(&sb_lock);
410 /* lock was dropped, must reset next */ 417 if (p)
411 list_safe_reset_next(sb, n, s_list); 418 __put_super(p);
412 __put_super(sb); 419 p = sb;
413 } 420 }
421 if (p)
422 __put_super(p);
414 spin_unlock(&sb_lock); 423 spin_unlock(&sb_lock);
415} 424}
416 425
@@ -572,10 +581,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
572 581
573static void do_emergency_remount(struct work_struct *work) 582static void do_emergency_remount(struct work_struct *work)
574{ 583{
575 struct super_block *sb, *n; 584 struct super_block *sb, *p = NULL;
576 585
577 spin_lock(&sb_lock); 586 spin_lock(&sb_lock);
578 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 587 list_for_each_entry(sb, &super_blocks, s_list) {
579 if (list_empty(&sb->s_instances)) 588 if (list_empty(&sb->s_instances))
580 continue; 589 continue;
581 sb->s_count++; 590 sb->s_count++;
@@ -589,10 +598,12 @@ static void do_emergency_remount(struct work_struct *work)
589 } 598 }
590 up_write(&sb->s_umount); 599 up_write(&sb->s_umount);
591 spin_lock(&sb_lock); 600 spin_lock(&sb_lock);
592 /* lock was dropped, must reset next */ 601 if (p)
593 list_safe_reset_next(sb, n, s_list); 602 __put_super(p);
594 __put_super(sb); 603 p = sb;
595 } 604 }
605 if (p)
606 __put_super(p);
596 spin_unlock(&sb_lock); 607 spin_unlock(&sb_lock);
597 kfree(work); 608 kfree(work);
598 printk("Emergency Remount complete\n"); 609 printk("Emergency Remount complete\n");
@@ -773,7 +784,16 @@ int get_sb_bdev(struct file_system_type *fs_type,
773 goto error_bdev; 784 goto error_bdev;
774 } 785 }
775 786
787 /*
788 * s_umount nests inside bd_mutex during
789 * __invalidate_device(). close_bdev_exclusive()
790 * acquires bd_mutex and can't be called under
791 * s_umount. Drop s_umount temporarily. This is safe
792 * as we're holding an active reference.
793 */
794 up_write(&s->s_umount);
776 close_bdev_exclusive(bdev, mode); 795 close_bdev_exclusive(bdev, mode);
796 down_write(&s->s_umount);
777 } else { 797 } else {
778 char b[BDEVNAME_SIZE]; 798 char b[BDEVNAME_SIZE];
779 799
@@ -909,6 +929,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
909 goto out_free_secdata; 929 goto out_free_secdata;
910 BUG_ON(!mnt->mnt_sb); 930 BUG_ON(!mnt->mnt_sb);
911 WARN_ON(!mnt->mnt_sb->s_bdi); 931 WARN_ON(!mnt->mnt_sb->s_bdi);
932 mnt->mnt_sb->s_flags |= MS_BORN;
912 933
913 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 934 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
914 if (error) 935 if (error)
diff --git a/fs/sync.c b/fs/sync.c
index 15aa6f03b2da..ba76b9623e7e 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -128,31 +128,6 @@ void emergency_sync(void)
128 } 128 }
129} 129}
130 130
131/*
132 * Generic function to fsync a file.
133 */
134int file_fsync(struct file *filp, int datasync)
135{
136 struct inode *inode = filp->f_mapping->host;
137 struct super_block * sb;
138 int ret, err;
139
140 /* sync the inode to buffers */
141 ret = write_inode_now(inode, 0);
142
143 /* sync the superblock to buffers */
144 sb = inode->i_sb;
145 if (sb->s_dirt && sb->s_op->write_super)
146 sb->s_op->write_super(sb);
147
148 /* .. finally sync the buffers to disk */
149 err = sync_blockdev(sb->s_bdev);
150 if (!ret)
151 ret = err;
152 return ret;
153}
154EXPORT_SYMBOL(file_fsync);
155
156/** 131/**
157 * vfs_fsync_range - helper to sync a range of data & metadata to disk 132 * vfs_fsync_range - helper to sync a range of data & metadata to disk
158 * @file: file to sync 133 * @file: file to sync
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1beaa739d0a6..1b27b5688f62 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -593,7 +593,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
593 * @mode: file permissions. 593 * @mode: file permissions.
594 * 594 *
595 */ 595 */
596int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) 596int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
597 mode_t mode)
597{ 598{
598 struct sysfs_dirent *sd; 599 struct sysfs_dirent *sd;
599 struct iattr newattrs; 600 struct iattr newattrs;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0835a3b70e03..cffb1fd8ba33 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -122,7 +122,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
122 goto out; 122 goto out;
123 123
124 /* this ignores size changes */ 124 /* this ignores size changes */
125 generic_setattr(inode, iattr); 125 setattr_copy(inode, iattr);
126 126
127out: 127out:
128 mutex_unlock(&sysfs_mutex); 128 mutex_unlock(&sysfs_mutex);
@@ -312,15 +312,15 @@ struct inode * sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd)
312 * The sysfs_dirent serves as both an inode and a directory entry for sysfs. 312 * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
313 * To prevent the sysfs inode numbers from being freed prematurely we take a 313 * To prevent the sysfs inode numbers from being freed prematurely we take a
314 * reference to sysfs_dirent from the sysfs inode. A 314 * reference to sysfs_dirent from the sysfs inode. A
315 * super_operations.delete_inode() implementation is needed to drop that 315 * super_operations.evict_inode() implementation is needed to drop that
316 * reference upon inode destruction. 316 * reference upon inode destruction.
317 */ 317 */
318void sysfs_delete_inode(struct inode *inode) 318void sysfs_evict_inode(struct inode *inode)
319{ 319{
320 struct sysfs_dirent *sd = inode->i_private; 320 struct sysfs_dirent *sd = inode->i_private;
321 321
322 truncate_inode_pages(&inode->i_data, 0); 322 truncate_inode_pages(&inode->i_data, 0);
323 clear_inode(inode); 323 end_writeback(inode);
324 sysfs_put(sd); 324 sysfs_put(sd);
325} 325}
326 326
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 281c0c9bc39f..f2af22574c50 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -29,7 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
29static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
30 .statfs = simple_statfs, 30 .statfs = simple_statfs,
31 .drop_inode = generic_delete_inode, 31 .drop_inode = generic_delete_inode,
32 .delete_inode = sysfs_delete_inode, 32 .evict_inode = sysfs_evict_inode,
33}; 33};
34 34
35struct sysfs_dirent sysfs_root = { 35struct sysfs_dirent sysfs_root = {
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6a13105b5594..d9be60a2e956 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -198,7 +198,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
198 * inode.c 198 * inode.c
199 */ 199 */
200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); 200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
201void sysfs_delete_inode(struct inode *inode); 201void sysfs_evict_inode(struct inode *inode);
202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); 202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
203int sysfs_permission(struct inode *inode, int mask); 203int sysfs_permission(struct inode *inode, int mask);
204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 79941e4964a4..a77c42157620 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -218,8 +218,7 @@ got_it:
218 pos = page_offset(page) + 218 pos = page_offset(page) +
219 (char*)de - (char*)page_address(page); 219 (char*)de - (char*)page_address(page);
220 lock_page(page); 220 lock_page(page);
221 err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE, 221 err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
222 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
223 if (err) 222 if (err)
224 goto out_unlock; 223 goto out_unlock;
225 memcpy (de->name, name, namelen); 224 memcpy (de->name, name, namelen);
@@ -239,15 +238,13 @@ out_unlock:
239 238
240int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) 239int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
241{ 240{
242 struct address_space *mapping = page->mapping; 241 struct inode *inode = page->mapping->host;
243 struct inode *inode = (struct inode*)mapping->host;
244 char *kaddr = (char*)page_address(page); 242 char *kaddr = (char*)page_address(page);
245 loff_t pos = page_offset(page) + (char *)de - kaddr; 243 loff_t pos = page_offset(page) + (char *)de - kaddr;
246 int err; 244 int err;
247 245
248 lock_page(page); 246 lock_page(page);
249 err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, 247 err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
250 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
251 BUG_ON(err); 248 BUG_ON(err);
252 de->inode = 0; 249 de->inode = 0;
253 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); 250 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
@@ -259,16 +256,14 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
259 256
260int sysv_make_empty(struct inode *inode, struct inode *dir) 257int sysv_make_empty(struct inode *inode, struct inode *dir)
261{ 258{
262 struct address_space *mapping = inode->i_mapping; 259 struct page *page = grab_cache_page(inode->i_mapping, 0);
263 struct page *page = grab_cache_page(mapping, 0);
264 struct sysv_dir_entry * de; 260 struct sysv_dir_entry * de;
265 char *base; 261 char *base;
266 int err; 262 int err;
267 263
268 if (!page) 264 if (!page)
269 return -ENOMEM; 265 return -ENOMEM;
270 err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE, 266 err = sysv_prepare_chunk(page, 0, 2 * SYSV_DIRSIZE);
271 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
272 if (err) { 267 if (err) {
273 unlock_page(page); 268 unlock_page(page);
274 goto fail; 269 goto fail;
@@ -341,15 +336,13 @@ not_empty:
341void sysv_set_link(struct sysv_dir_entry *de, struct page *page, 336void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
342 struct inode *inode) 337 struct inode *inode)
343{ 338{
344 struct address_space *mapping = page->mapping; 339 struct inode *dir = page->mapping->host;
345 struct inode *dir = mapping->host;
346 loff_t pos = page_offset(page) + 340 loff_t pos = page_offset(page) +
347 (char *)de-(char*)page_address(page); 341 (char *)de-(char*)page_address(page);
348 int err; 342 int err;
349 343
350 lock_page(page); 344 lock_page(page);
351 err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, 345 err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
352 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
353 BUG_ON(err); 346 BUG_ON(err);
354 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); 347 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
355 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); 348 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 750cc22349bd..0a65939508e9 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -30,7 +30,29 @@ const struct file_operations sysv_file_operations = {
30 .splice_read = generic_file_splice_read, 30 .splice_read = generic_file_splice_read,
31}; 31};
32 32
33static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
34{
35 struct inode *inode = dentry->d_inode;
36 int error;
37
38 error = inode_change_ok(inode, attr);
39 if (error)
40 return error;
41
42 if ((attr->ia_valid & ATTR_SIZE) &&
43 attr->ia_size != i_size_read(inode)) {
44 error = vmtruncate(inode, attr->ia_size);
45 if (error)
46 return error;
47 }
48
49 setattr_copy(inode, attr);
50 mark_inode_dirty(inode);
51 return 0;
52}
53
33const struct inode_operations sysv_file_inode_operations = { 54const struct inode_operations sysv_file_inode_operations = {
34 .truncate = sysv_truncate, 55 .truncate = sysv_truncate,
56 .setattr = sysv_setattr,
35 .getattr = sysv_getattr, 57 .getattr = sysv_getattr,
36}; 58};
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index fcc498ec9b33..0c96c98bd1db 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -113,7 +113,6 @@ void sysv_free_inode(struct inode * inode)
113 return; 113 return;
114 } 114 }
115 raw_inode = sysv_raw_inode(sb, ino, &bh); 115 raw_inode = sysv_raw_inode(sb, ino, &bh);
116 clear_inode(inode);
117 if (!raw_inode) { 116 if (!raw_inode) {
118 printk("sysv_free_inode: unable to read inode block on device " 117 printk("sysv_free_inode: unable to read inode block on device "
119 "%s\n", inode->i_sb->s_id); 118 "%s\n", inode->i_sb->s_id);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index d4a5380b5669..de44d067b9e6 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -71,8 +71,8 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)
71 lock_super(sb); 71 lock_super(sb);
72 if (sbi->s_forced_ro) 72 if (sbi->s_forced_ro)
73 *flags |= MS_RDONLY; 73 *flags |= MS_RDONLY;
74 if (!(*flags & MS_RDONLY)) 74 if (*flags & MS_RDONLY)
75 sb->s_dirt = 1; 75 sysv_write_super(sb);
76 unlock_super(sb); 76 unlock_super(sb);
77 return 0; 77 return 0;
78} 78}
@@ -308,12 +308,17 @@ int sysv_sync_inode(struct inode *inode)
308 return __sysv_write_inode(inode, 1); 308 return __sysv_write_inode(inode, 1);
309} 309}
310 310
311static void sysv_delete_inode(struct inode *inode) 311static void sysv_evict_inode(struct inode *inode)
312{ 312{
313 truncate_inode_pages(&inode->i_data, 0); 313 truncate_inode_pages(&inode->i_data, 0);
314 inode->i_size = 0; 314 if (!inode->i_nlink) {
315 sysv_truncate(inode); 315 inode->i_size = 0;
316 sysv_free_inode(inode); 316 sysv_truncate(inode);
317 }
318 invalidate_inode_buffers(inode);
319 end_writeback(inode);
320 if (!inode->i_nlink)
321 sysv_free_inode(inode);
317} 322}
318 323
319static struct kmem_cache *sysv_inode_cachep; 324static struct kmem_cache *sysv_inode_cachep;
@@ -344,7 +349,7 @@ const struct super_operations sysv_sops = {
344 .alloc_inode = sysv_alloc_inode, 349 .alloc_inode = sysv_alloc_inode,
345 .destroy_inode = sysv_destroy_inode, 350 .destroy_inode = sysv_destroy_inode,
346 .write_inode = sysv_write_inode, 351 .write_inode = sysv_write_inode,
347 .delete_inode = sysv_delete_inode, 352 .evict_inode = sysv_evict_inode,
348 .put_super = sysv_put_super, 353 .put_super = sysv_put_super,
349 .write_super = sysv_write_super, 354 .write_super = sysv_write_super,
350 .sync_fs = sysv_sync_fs, 355 .sync_fs = sysv_sync_fs,
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index f042eec464c2..9ca66276315e 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -459,20 +459,25 @@ static int sysv_readpage(struct file *file, struct page *page)
459 return block_read_full_page(page,get_block); 459 return block_read_full_page(page,get_block);
460} 460}
461 461
462int __sysv_write_begin(struct file *file, struct address_space *mapping, 462int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len)
463 loff_t pos, unsigned len, unsigned flags,
464 struct page **pagep, void **fsdata)
465{ 463{
466 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 464 return __block_write_begin(page, pos, len, get_block);
467 get_block);
468} 465}
469 466
470static int sysv_write_begin(struct file *file, struct address_space *mapping, 467static int sysv_write_begin(struct file *file, struct address_space *mapping,
471 loff_t pos, unsigned len, unsigned flags, 468 loff_t pos, unsigned len, unsigned flags,
472 struct page **pagep, void **fsdata) 469 struct page **pagep, void **fsdata)
473{ 470{
474 *pagep = NULL; 471 int ret;
475 return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 472
473 ret = block_write_begin(mapping, pos, len, flags, pagep, get_block);
474 if (unlikely(ret)) {
475 loff_t isize = mapping->host->i_size;
476 if (pos + len > isize)
477 vmtruncate(mapping->host, isize);
478 }
479
480 return ret;
476} 481}
477 482
478static sector_t sysv_bmap(struct address_space *mapping, sector_t block) 483static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 5a903da54551..0e44a6253352 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -347,7 +347,6 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
347 sb->s_flags |= MS_RDONLY; 347 sb->s_flags |= MS_RDONLY;
348 if (sbi->s_truncate) 348 if (sbi->s_truncate)
349 sb->s_root->d_op = &sysv_dentry_operations; 349 sb->s_root->d_op = &sysv_dentry_operations;
350 sb->s_dirt = 1;
351 return 1; 350 return 1;
352} 351}
353 352
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 94cb9b4d76c2..bb55cdb394bf 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -136,9 +136,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *);
136 136
137/* itree.c */ 137/* itree.c */
138extern void sysv_truncate(struct inode *); 138extern void sysv_truncate(struct inode *);
139extern int __sysv_write_begin(struct file *file, struct address_space *mapping, 139extern int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len);
140 loff_t pos, unsigned len, unsigned flags,
141 struct page **pagep, void **fsdata);
142 140
143/* inode.c */ 141/* inode.c */
144extern struct inode *sysv_iget(struct super_block *, unsigned int); 142extern struct inode *sysv_iget(struct super_block *, unsigned int);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 12f445cee9f7..03ae894c45de 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -967,14 +967,15 @@ static int do_writepage(struct page *page, int len)
967 * the page locked, and it locks @ui_mutex. However, write-back does take inode 967 * the page locked, and it locks @ui_mutex. However, write-back does take inode
968 * @i_mutex, which means other VFS operations may be run on this inode at the 968 * @i_mutex, which means other VFS operations may be run on this inode at the
969 * same time. And the problematic one is truncation to smaller size, from where 969 * same time. And the problematic one is truncation to smaller size, from where
970 * we have to call 'simple_setsize()', which first changes @inode->i_size, then 970 * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
971 * drops the truncated pages. And while dropping the pages, it takes the page 971 * drops the truncated pages. And while dropping the pages, it takes the page
972 * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with 972 * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
973 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This 973 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
974 * means that @inode->i_size is changed while @ui_mutex is unlocked. 974 * means that @inode->i_size is changed while @ui_mutex is unlocked.
975 * 975 *
976 * XXX: with the new truncate the above is not true anymore, the simple_setsize 976 * XXX(truncate): with the new truncate sequence this is not true anymore,
977 * calls can be replaced with the individual components. 977 * and the calls to truncate_setsize can be move around freely. They should
978 * be moved to the very end of the truncate sequence.
978 * 979 *
979 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond 980 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
980 * inode size. How do we do this if @inode->i_size may became smaller while we 981 * inode size. How do we do this if @inode->i_size may became smaller while we
@@ -1128,9 +1129,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
1128 budgeted = 0; 1129 budgeted = 0;
1129 } 1130 }
1130 1131
1131 err = simple_setsize(inode, new_size); 1132 truncate_setsize(inode, new_size);
1132 if (err)
1133 goto out_budg;
1134 1133
1135 if (offset) { 1134 if (offset) {
1136 pgoff_t index = new_size >> PAGE_CACHE_SHIFT; 1135 pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
@@ -1217,16 +1216,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
1217 1216
1218 if (attr->ia_valid & ATTR_SIZE) { 1217 if (attr->ia_valid & ATTR_SIZE) {
1219 dbg_gen("size %lld -> %lld", inode->i_size, new_size); 1218 dbg_gen("size %lld -> %lld", inode->i_size, new_size);
1220 err = simple_setsize(inode, new_size); 1219 truncate_setsize(inode, new_size);
1221 if (err)
1222 goto out;
1223 } 1220 }
1224 1221
1225 mutex_lock(&ui->ui_mutex); 1222 mutex_lock(&ui->ui_mutex);
1226 if (attr->ia_valid & ATTR_SIZE) { 1223 if (attr->ia_valid & ATTR_SIZE) {
1227 /* Truncation changes inode [mc]time */ 1224 /* Truncation changes inode [mc]time */
1228 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); 1225 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1229 /* 'simple_setsize()' changed @i_size, update @ui_size */ 1226 /* 'truncate_setsize()' changed @i_size, update @ui_size */
1230 ui->ui_size = inode->i_size; 1227 ui->ui_size = inode->i_size;
1231 } 1228 }
1232 1229
@@ -1248,10 +1245,6 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
1248 if (IS_SYNC(inode)) 1245 if (IS_SYNC(inode))
1249 err = inode->i_sb->s_op->write_inode(inode, NULL); 1246 err = inode->i_sb->s_op->write_inode(inode, NULL);
1250 return err; 1247 return err;
1251
1252out:
1253 ubifs_release_budget(c, &req);
1254 return err;
1255} 1248}
1256 1249
1257int ubifs_setattr(struct dentry *dentry, struct iattr *attr) 1250int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ad7f67b827ea..0084a33c4c69 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1457,13 +1457,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
1457 shft -= UBIFS_LPT_FANOUT_SHIFT; 1457 shft -= UBIFS_LPT_FANOUT_SHIFT;
1458 nnode = ubifs_get_nnode(c, nnode, iip); 1458 nnode = ubifs_get_nnode(c, nnode, iip);
1459 if (IS_ERR(nnode)) 1459 if (IS_ERR(nnode))
1460 return ERR_PTR(PTR_ERR(nnode)); 1460 return ERR_CAST(nnode);
1461 } 1461 }
1462 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); 1462 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1463 shft -= UBIFS_LPT_FANOUT_SHIFT; 1463 shft -= UBIFS_LPT_FANOUT_SHIFT;
1464 pnode = ubifs_get_pnode(c, nnode, iip); 1464 pnode = ubifs_get_pnode(c, nnode, iip);
1465 if (IS_ERR(pnode)) 1465 if (IS_ERR(pnode))
1466 return ERR_PTR(PTR_ERR(pnode)); 1466 return ERR_CAST(pnode);
1467 iip = (i & (UBIFS_LPT_FANOUT - 1)); 1467 iip = (i & (UBIFS_LPT_FANOUT - 1));
1468 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, 1468 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
1469 pnode->lprops[iip].free, pnode->lprops[iip].dirty, 1469 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
@@ -1586,7 +1586,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
1586 nnode = c->nroot; 1586 nnode = c->nroot;
1587 nnode = dirty_cow_nnode(c, nnode); 1587 nnode = dirty_cow_nnode(c, nnode);
1588 if (IS_ERR(nnode)) 1588 if (IS_ERR(nnode))
1589 return ERR_PTR(PTR_ERR(nnode)); 1589 return ERR_CAST(nnode);
1590 i = lnum - c->main_first; 1590 i = lnum - c->main_first;
1591 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; 1591 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
1592 for (h = 1; h < c->lpt_hght; h++) { 1592 for (h = 1; h < c->lpt_hght; h++) {
@@ -1594,19 +1594,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
1594 shft -= UBIFS_LPT_FANOUT_SHIFT; 1594 shft -= UBIFS_LPT_FANOUT_SHIFT;
1595 nnode = ubifs_get_nnode(c, nnode, iip); 1595 nnode = ubifs_get_nnode(c, nnode, iip);
1596 if (IS_ERR(nnode)) 1596 if (IS_ERR(nnode))
1597 return ERR_PTR(PTR_ERR(nnode)); 1597 return ERR_CAST(nnode);
1598 nnode = dirty_cow_nnode(c, nnode); 1598 nnode = dirty_cow_nnode(c, nnode);
1599 if (IS_ERR(nnode)) 1599 if (IS_ERR(nnode))
1600 return ERR_PTR(PTR_ERR(nnode)); 1600 return ERR_CAST(nnode);
1601 } 1601 }
1602 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); 1602 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1603 shft -= UBIFS_LPT_FANOUT_SHIFT; 1603 shft -= UBIFS_LPT_FANOUT_SHIFT;
1604 pnode = ubifs_get_pnode(c, nnode, iip); 1604 pnode = ubifs_get_pnode(c, nnode, iip);
1605 if (IS_ERR(pnode)) 1605 if (IS_ERR(pnode))
1606 return ERR_PTR(PTR_ERR(pnode)); 1606 return ERR_CAST(pnode);
1607 pnode = dirty_cow_pnode(c, pnode); 1607 pnode = dirty_cow_pnode(c, pnode);
1608 if (IS_ERR(pnode)) 1608 if (IS_ERR(pnode))
1609 return ERR_PTR(PTR_ERR(pnode)); 1609 return ERR_CAST(pnode);
1610 iip = (i & (UBIFS_LPT_FANOUT - 1)); 1610 iip = (i & (UBIFS_LPT_FANOUT - 1));
1611 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, 1611 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
1612 pnode->lprops[iip].free, pnode->lprops[iip].dirty, 1612 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 13cb7a4237bf..d12535b7fc78 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -646,7 +646,7 @@ static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
646 shft -= UBIFS_LPT_FANOUT_SHIFT; 646 shft -= UBIFS_LPT_FANOUT_SHIFT;
647 nnode = ubifs_get_nnode(c, nnode, iip); 647 nnode = ubifs_get_nnode(c, nnode, iip);
648 if (IS_ERR(nnode)) 648 if (IS_ERR(nnode))
649 return ERR_PTR(PTR_ERR(nnode)); 649 return ERR_CAST(nnode);
650 } 650 }
651 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); 651 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
652 return ubifs_get_pnode(c, nnode, iip); 652 return ubifs_get_pnode(c, nnode, iip);
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 109c6ea03bb5..daae9e1f5382 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -24,7 +24,7 @@
24 * This file implements functions needed to recover from unclean un-mounts. 24 * This file implements functions needed to recover from unclean un-mounts.
25 * When UBIFS is mounted, it checks a flag on the master node to determine if 25 * When UBIFS is mounted, it checks a flag on the master node to determine if
26 * an un-mount was completed successfully. If not, the process of mounting 26 * an un-mount was completed successfully. If not, the process of mounting
27 * incorparates additional checking and fixing of on-flash data structures. 27 * incorporates additional checking and fixing of on-flash data structures.
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that 28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted 29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
30 * read-only, and the flash is not modified in that case. 30 * read-only, and the flash is not modified in that case.
@@ -1063,8 +1063,21 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1063 } 1063 }
1064 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1064 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1065 if (err) { 1065 if (err) {
1066 if (err == -ENOSPC) 1066 /*
1067 dbg_err("could not find a dirty LEB"); 1067 * There are no dirty or empty LEBs subject to here being
1068 * enough for the index. Try to use
1069 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1070 * LEBs (ignoring index requirements). If the index then
1071 * doesn't have enough LEBs the recovery commit will fail -
1072 * which is the same result anyway i.e. recovery fails. So
1073 * there is no problem ignoring index requirements and just
1074 * grabbing a free LEB since we have already established there
1075 * is not a dirty LEB we could have used instead.
1076 */
1077 if (err == -ENOSPC) {
1078 dbg_rcvry("could not find a dirty LEB");
1079 goto find_free;
1080 }
1068 return err; 1081 return err;
1069 } 1082 }
1070 ubifs_assert(!(lp.flags & LPROPS_INDEX)); 1083 ubifs_assert(!(lp.flags & LPROPS_INDEX));
@@ -1139,8 +1152,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1139find_free: 1152find_free:
1140 /* 1153 /*
1141 * There is no GC head LEB or the free space in the GC head LEB is too 1154 * There is no GC head LEB or the free space in the GC head LEB is too
1142 * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so 1155 * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1143 * GC is not run. 1156 * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1144 */ 1157 */
1145 lnum = ubifs_find_free_leb_for_idx(c); 1158 lnum = ubifs_find_free_leb_for_idx(c);
1146 if (lnum < 0) { 1159 if (lnum < 0) {
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 4d2f2157dd3f..cd5900b85d38 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -327,7 +327,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
327 return err; 327 return err;
328} 328}
329 329
330static void ubifs_delete_inode(struct inode *inode) 330static void ubifs_evict_inode(struct inode *inode)
331{ 331{
332 int err; 332 int err;
333 struct ubifs_info *c = inode->i_sb->s_fs_info; 333 struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -343,9 +343,12 @@ static void ubifs_delete_inode(struct inode *inode)
343 343
344 dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); 344 dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
345 ubifs_assert(!atomic_read(&inode->i_count)); 345 ubifs_assert(!atomic_read(&inode->i_count));
346 ubifs_assert(inode->i_nlink == 0);
347 346
348 truncate_inode_pages(&inode->i_data, 0); 347 truncate_inode_pages(&inode->i_data, 0);
348
349 if (inode->i_nlink)
350 goto done;
351
349 if (is_bad_inode(inode)) 352 if (is_bad_inode(inode))
350 goto out; 353 goto out;
351 354
@@ -367,7 +370,8 @@ out:
367 c->nospace = c->nospace_rp = 0; 370 c->nospace = c->nospace_rp = 0;
368 smp_wmb(); 371 smp_wmb();
369 } 372 }
370 clear_inode(inode); 373done:
374 end_writeback(inode);
371} 375}
372 376
373static void ubifs_dirty_inode(struct inode *inode) 377static void ubifs_dirty_inode(struct inode *inode)
@@ -1307,6 +1311,8 @@ static int mount_ubifs(struct ubifs_info *c)
1307 if (err) 1311 if (err)
1308 goto out_orphans; 1312 goto out_orphans;
1309 err = ubifs_rcvry_gc_commit(c); 1313 err = ubifs_rcvry_gc_commit(c);
1314 if (err)
1315 goto out_orphans;
1310 } else { 1316 } else {
1311 err = take_gc_lnum(c); 1317 err = take_gc_lnum(c);
1312 if (err) 1318 if (err)
@@ -1318,7 +1324,7 @@ static int mount_ubifs(struct ubifs_info *c)
1318 */ 1324 */
1319 err = ubifs_leb_unmap(c, c->gc_lnum); 1325 err = ubifs_leb_unmap(c, c->gc_lnum);
1320 if (err) 1326 if (err)
1321 return err; 1327 goto out_orphans;
1322 } 1328 }
1323 1329
1324 err = dbg_check_lprops(c); 1330 err = dbg_check_lprops(c);
@@ -1824,7 +1830,7 @@ const struct super_operations ubifs_super_operations = {
1824 .destroy_inode = ubifs_destroy_inode, 1830 .destroy_inode = ubifs_destroy_inode,
1825 .put_super = ubifs_put_super, 1831 .put_super = ubifs_put_super,
1826 .write_inode = ubifs_write_inode, 1832 .write_inode = ubifs_write_inode,
1827 .delete_inode = ubifs_delete_inode, 1833 .evict_inode = ubifs_evict_inode,
1828 .statfs = ubifs_statfs, 1834 .statfs = ubifs_statfs,
1829 .dirty_inode = ubifs_dirty_inode, 1835 .dirty_inode = ubifs_dirty_inode,
1830 .remount_fs = ubifs_remount_fs, 1836 .remount_fs = ubifs_remount_fs,
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 04310878f449..0c9876b396dd 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb {
379 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses 379 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
380 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot 380 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
381 * make sure @inode->i_size is always changed under @ui_mutex, because it 381 * make sure @inode->i_size is always changed under @ui_mutex, because it
382 * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock 382 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
383 * with 'ubifs_writepage()' (see file.c). All the other inode fields are 383 * with 'ubifs_writepage()' (see file.c). All the other inode fields are
384 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one 384 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
385 * could consider to rework locking and base it on "shadow" fields. 385 * could consider to rework locking and base it on "shadow" fields.
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 94e06d6bddbd..66b9e7e7e4c5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -36,7 +36,6 @@
36#include <linux/pagemap.h> 36#include <linux/pagemap.h>
37#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
38#include <linux/aio.h> 38#include <linux/aio.h>
39#include <linux/smp_lock.h>
40 39
41#include "udf_i.h" 40#include "udf_i.h"
42#include "udf_sb.h" 41#include "udf_sb.h"
@@ -228,6 +227,28 @@ const struct file_operations udf_file_operations = {
228 .llseek = generic_file_llseek, 227 .llseek = generic_file_llseek,
229}; 228};
230 229
230static int udf_setattr(struct dentry *dentry, struct iattr *attr)
231{
232 struct inode *inode = dentry->d_inode;
233 int error;
234
235 error = inode_change_ok(inode, attr);
236 if (error)
237 return error;
238
239 if ((attr->ia_valid & ATTR_SIZE) &&
240 attr->ia_size != i_size_read(inode)) {
241 error = vmtruncate(inode, attr->ia_size);
242 if (error)
243 return error;
244 }
245
246 setattr_copy(inode, attr);
247 mark_inode_dirty(inode);
248 return 0;
249}
250
231const struct inode_operations udf_file_inode_operations = { 251const struct inode_operations udf_file_inode_operations = {
252 .setattr = udf_setattr,
232 .truncate = udf_truncate, 253 .truncate = udf_truncate,
233}; 254};
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 18cd7111185d..75d9304d0dc3 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -31,8 +31,6 @@ void udf_free_inode(struct inode *inode)
31 struct super_block *sb = inode->i_sb; 31 struct super_block *sb = inode->i_sb;
32 struct udf_sb_info *sbi = UDF_SB(sb); 32 struct udf_sb_info *sbi = UDF_SB(sb);
33 33
34 clear_inode(inode);
35
36 mutex_lock(&sbi->s_alloc_mutex); 34 mutex_lock(&sbi->s_alloc_mutex);
37 if (sbi->s_lvid_bh) { 35 if (sbi->s_lvid_bh) {
38 struct logicalVolIntegrityDescImpUse *lvidiu = 36 struct logicalVolIntegrityDescImpUse *lvidiu =
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 124852bcf6fe..fc48f37aa2dd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -68,37 +68,23 @@ static void udf_update_extents(struct inode *,
68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); 68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
69 69
70 70
71void udf_delete_inode(struct inode *inode) 71void udf_evict_inode(struct inode *inode)
72{
73 truncate_inode_pages(&inode->i_data, 0);
74
75 if (is_bad_inode(inode))
76 goto no_delete;
77
78 inode->i_size = 0;
79 udf_truncate(inode);
80 lock_kernel();
81
82 udf_update_inode(inode, IS_SYNC(inode));
83 udf_free_inode(inode);
84
85 unlock_kernel();
86 return;
87
88no_delete:
89 clear_inode(inode);
90}
91
92/*
93 * If we are going to release inode from memory, we truncate last inode extent
94 * to proper length. We could use drop_inode() but it's called under inode_lock
95 * and thus we cannot mark inode dirty there. We use clear_inode() but we have
96 * to make sure to write inode as it's not written automatically.
97 */
98void udf_clear_inode(struct inode *inode)
99{ 72{
100 struct udf_inode_info *iinfo = UDF_I(inode); 73 struct udf_inode_info *iinfo = UDF_I(inode);
74 int want_delete = 0;
75
76 truncate_inode_pages(&inode->i_data, 0);
101 77
78 if (!inode->i_nlink && !is_bad_inode(inode)) {
79 want_delete = 1;
80 inode->i_size = 0;
81 udf_truncate(inode);
82 lock_kernel();
83 udf_update_inode(inode, IS_SYNC(inode));
84 unlock_kernel();
85 }
86 invalidate_inode_buffers(inode);
87 end_writeback(inode);
102 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 88 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
103 inode->i_size != iinfo->i_lenExtents) { 89 inode->i_size != iinfo->i_lenExtents) {
104 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " 90 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has "
@@ -108,9 +94,13 @@ void udf_clear_inode(struct inode *inode)
108 (unsigned long long)inode->i_size, 94 (unsigned long long)inode->i_size,
109 (unsigned long long)iinfo->i_lenExtents); 95 (unsigned long long)iinfo->i_lenExtents);
110 } 96 }
111
112 kfree(iinfo->i_ext.i_data); 97 kfree(iinfo->i_ext.i_data);
113 iinfo->i_ext.i_data = NULL; 98 iinfo->i_ext.i_data = NULL;
99 if (want_delete) {
100 lock_kernel();
101 udf_free_inode(inode);
102 unlock_kernel();
103 }
114} 104}
115 105
116static int udf_writepage(struct page *page, struct writeback_control *wbc) 106static int udf_writepage(struct page *page, struct writeback_control *wbc)
@@ -127,9 +117,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
127 loff_t pos, unsigned len, unsigned flags, 117 loff_t pos, unsigned len, unsigned flags,
128 struct page **pagep, void **fsdata) 118 struct page **pagep, void **fsdata)
129{ 119{
130 *pagep = NULL; 120 int ret;
131 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 121
132 udf_get_block); 122 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
123 if (unlikely(ret)) {
124 loff_t isize = mapping->host->i_size;
125 if (pos + len > isize)
126 vmtruncate(mapping->host, isize);
127 }
128
129 return ret;
133} 130}
134 131
135static sector_t udf_bmap(struct address_space *mapping, sector_t block) 132static sector_t udf_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 612d1e2e285a..65412d84a45d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -175,8 +175,7 @@ static const struct super_operations udf_sb_ops = {
175 .alloc_inode = udf_alloc_inode, 175 .alloc_inode = udf_alloc_inode,
176 .destroy_inode = udf_destroy_inode, 176 .destroy_inode = udf_destroy_inode,
177 .write_inode = udf_write_inode, 177 .write_inode = udf_write_inode,
178 .delete_inode = udf_delete_inode, 178 .evict_inode = udf_evict_inode,
179 .clear_inode = udf_clear_inode,
180 .put_super = udf_put_super, 179 .put_super = udf_put_super,
181 .sync_fs = udf_sync_fs, 180 .sync_fs = udf_sync_fs,
182 .statfs = udf_statfs, 181 .statfs = udf_statfs,
@@ -1579,9 +1578,7 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
1579{ 1578{
1580 struct anchorVolDescPtr *anchor; 1579 struct anchorVolDescPtr *anchor;
1581 long main_s, main_e, reserve_s, reserve_e; 1580 long main_s, main_e, reserve_s, reserve_e;
1582 struct udf_sb_info *sbi;
1583 1581
1584 sbi = UDF_SB(sb);
1585 anchor = (struct anchorVolDescPtr *)bh->b_data; 1582 anchor = (struct anchorVolDescPtr *)bh->b_data;
1586 1583
1587 /* Locate the main sequence */ 1584 /* Locate the main sequence */
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 2bac0354891f..6995ab1f4305 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -139,8 +139,7 @@ extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
139extern struct buffer_head *udf_bread(struct inode *, int, int, int *); 139extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
140extern void udf_truncate(struct inode *); 140extern void udf_truncate(struct inode *);
141extern void udf_read_inode(struct inode *); 141extern void udf_read_inode(struct inode *);
142extern void udf_delete_inode(struct inode *); 142extern void udf_evict_inode(struct inode *);
143extern void udf_clear_inode(struct inode *);
144extern int udf_write_inode(struct inode *, struct writeback_control *wbc); 143extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
145extern long udf_block_map(struct inode *, sector_t); 144extern long udf_block_map(struct inode *, sector_t);
146extern int udf_extend_file(struct inode *, struct extent_position *, 145extern int udf_extend_file(struct inode *, struct extent_position *,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index ec784756dc65..dbc90994715a 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -95,8 +95,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
95 int err; 95 int err;
96 96
97 lock_page(page); 97 lock_page(page);
98 err = __ufs_write_begin(NULL, page->mapping, pos, len, 98 err = ufs_prepare_chunk(page, pos, len);
99 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
100 BUG_ON(err); 99 BUG_ON(err);
101 100
102 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); 101 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
@@ -381,8 +380,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
381got_it: 380got_it:
382 pos = page_offset(page) + 381 pos = page_offset(page) +
383 (char*)de - (char*)page_address(page); 382 (char*)de - (char*)page_address(page);
384 err = __ufs_write_begin(NULL, page->mapping, pos, rec_len, 383 err = ufs_prepare_chunk(page, pos, rec_len);
385 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
386 if (err) 384 if (err)
387 goto out_unlock; 385 goto out_unlock;
388 if (de->d_ino) { 386 if (de->d_ino) {
@@ -518,7 +516,6 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
518 struct page * page) 516 struct page * page)
519{ 517{
520 struct super_block *sb = inode->i_sb; 518 struct super_block *sb = inode->i_sb;
521 struct address_space *mapping = page->mapping;
522 char *kaddr = page_address(page); 519 char *kaddr = page_address(page);
523 unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); 520 unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
524 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); 521 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
@@ -549,8 +546,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
549 546
550 pos = page_offset(page) + from; 547 pos = page_offset(page) + from;
551 lock_page(page); 548 lock_page(page);
552 err = __ufs_write_begin(NULL, mapping, pos, to - from, 549 err = ufs_prepare_chunk(page, pos, to - from);
553 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
554 BUG_ON(err); 550 BUG_ON(err);
555 if (pde) 551 if (pde)
556 pde->d_reclen = cpu_to_fs16(sb, to - from); 552 pde->d_reclen = cpu_to_fs16(sb, to - from);
@@ -577,8 +573,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
577 if (!page) 573 if (!page)
578 return -ENOMEM; 574 return -ENOMEM;
579 575
580 err = __ufs_write_begin(NULL, mapping, 0, chunk_size, 576 err = ufs_prepare_chunk(page, 0, chunk_size);
581 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
582 if (err) { 577 if (err) {
583 unlock_page(page); 578 unlock_page(page);
584 goto fail; 579 goto fail;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 594480e537d2..428017e018fe 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -94,8 +94,6 @@ void ufs_free_inode (struct inode * inode)
94 94
95 is_directory = S_ISDIR(inode->i_mode); 95 is_directory = S_ISDIR(inode->i_mode);
96 96
97 clear_inode (inode);
98
99 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) 97 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
100 ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino); 98 ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino);
101 else { 99 else {
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 73fe773aa034..2b251f2093af 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -558,20 +558,26 @@ static int ufs_readpage(struct file *file, struct page *page)
558 return block_read_full_page(page,ufs_getfrag_block); 558 return block_read_full_page(page,ufs_getfrag_block);
559} 559}
560 560
561int __ufs_write_begin(struct file *file, struct address_space *mapping, 561int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
562 loff_t pos, unsigned len, unsigned flags,
563 struct page **pagep, void **fsdata)
564{ 562{
565 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 563 return __block_write_begin(page, pos, len, ufs_getfrag_block);
566 ufs_getfrag_block);
567} 564}
568 565
569static int ufs_write_begin(struct file *file, struct address_space *mapping, 566static int ufs_write_begin(struct file *file, struct address_space *mapping,
570 loff_t pos, unsigned len, unsigned flags, 567 loff_t pos, unsigned len, unsigned flags,
571 struct page **pagep, void **fsdata) 568 struct page **pagep, void **fsdata)
572{ 569{
573 *pagep = NULL; 570 int ret;
574 return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 571
572 ret = block_write_begin(mapping, pos, len, flags, pagep,
573 ufs_getfrag_block);
574 if (unlikely(ret)) {
575 loff_t isize = mapping->host->i_size;
576 if (pos + len > isize)
577 vmtruncate(mapping->host, isize);
578 }
579
580 return ret;
575} 581}
576 582
577static sector_t ufs_bmap(struct address_space *mapping, sector_t block) 583static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
@@ -905,24 +911,33 @@ int ufs_sync_inode (struct inode *inode)
905 return ufs_update_inode (inode, 1); 911 return ufs_update_inode (inode, 1);
906} 912}
907 913
908void ufs_delete_inode (struct inode * inode) 914void ufs_evict_inode(struct inode * inode)
909{ 915{
910 loff_t old_i_size; 916 int want_delete = 0;
917
918 if (!inode->i_nlink && !is_bad_inode(inode))
919 want_delete = 1;
911 920
912 truncate_inode_pages(&inode->i_data, 0); 921 truncate_inode_pages(&inode->i_data, 0);
913 if (is_bad_inode(inode)) 922 if (want_delete) {
914 goto no_delete; 923 loff_t old_i_size;
915 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ 924 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
916 lock_kernel(); 925 lock_kernel();
917 mark_inode_dirty(inode); 926 mark_inode_dirty(inode);
918 ufs_update_inode(inode, IS_SYNC(inode)); 927 ufs_update_inode(inode, IS_SYNC(inode));
919 old_i_size = inode->i_size; 928 old_i_size = inode->i_size;
920 inode->i_size = 0; 929 inode->i_size = 0;
921 if (inode->i_blocks && ufs_truncate(inode, old_i_size)) 930 if (inode->i_blocks && ufs_truncate(inode, old_i_size))
922 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); 931 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
923 ufs_free_inode (inode); 932 unlock_kernel();
924 unlock_kernel(); 933 }
925 return; 934
926no_delete: 935 invalidate_inode_buffers(inode);
927 clear_inode(inode); /* We must guarantee clearing of inode... */ 936 end_writeback(inode);
937
938 if (want_delete) {
939 lock_kernel();
940 ufs_free_inode (inode);
941 unlock_kernel();
942 }
928} 943}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3ec5a9eb6efb..d510c1b91817 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1440,7 +1440,7 @@ static const struct super_operations ufs_super_ops = {
1440 .alloc_inode = ufs_alloc_inode, 1440 .alloc_inode = ufs_alloc_inode,
1441 .destroy_inode = ufs_destroy_inode, 1441 .destroy_inode = ufs_destroy_inode,
1442 .write_inode = ufs_write_inode, 1442 .write_inode = ufs_write_inode,
1443 .delete_inode = ufs_delete_inode, 1443 .evict_inode = ufs_evict_inode,
1444 .put_super = ufs_put_super, 1444 .put_super = ufs_put_super,
1445 .write_super = ufs_write_super, 1445 .write_super = ufs_write_super,
1446 .sync_fs = ufs_sync_fs, 1446 .sync_fs = ufs_sync_fs,
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 589e01a465ba..34d5cb135320 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -500,11 +500,6 @@ out:
500 return err; 500 return err;
501} 501}
502 502
503/*
504 * TODO:
505 * - truncate case should use proper ordering instead of using
506 * simple_setsize
507 */
508int ufs_setattr(struct dentry *dentry, struct iattr *attr) 503int ufs_setattr(struct dentry *dentry, struct iattr *attr)
509{ 504{
510 struct inode *inode = dentry->d_inode; 505 struct inode *inode = dentry->d_inode;
@@ -518,14 +513,17 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
518 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { 513 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
519 loff_t old_i_size = inode->i_size; 514 loff_t old_i_size = inode->i_size;
520 515
521 error = simple_setsize(inode, attr->ia_size); 516 /* XXX(truncate): truncate_setsize should be called last */
522 if (error) 517 truncate_setsize(inode, attr->ia_size);
523 return error; 518
524 error = ufs_truncate(inode, old_i_size); 519 error = ufs_truncate(inode, old_i_size);
525 if (error) 520 if (error)
526 return error; 521 return error;
527 } 522 }
528 return inode_setattr(inode, attr); 523
524 setattr_copy(inode, attr);
525 mark_inode_dirty(inode);
526 return 0;
529} 527}
530 528
531const struct inode_operations ufs_file_inode_operations = { 529const struct inode_operations ufs_file_inode_operations = {
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 179ae6b3180a..c08782e1b48a 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -108,7 +108,7 @@ extern struct inode * ufs_new_inode (struct inode *, int);
108extern struct inode *ufs_iget(struct super_block *, unsigned long); 108extern struct inode *ufs_iget(struct super_block *, unsigned long);
109extern int ufs_write_inode (struct inode *, struct writeback_control *); 109extern int ufs_write_inode (struct inode *, struct writeback_control *);
110extern int ufs_sync_inode (struct inode *); 110extern int ufs_sync_inode (struct inode *);
111extern void ufs_delete_inode (struct inode *); 111extern void ufs_evict_inode (struct inode *);
112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); 112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
113extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); 113extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
114 114
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 23ceed8c8fb9..0466036912f1 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -257,9 +257,7 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
257 257
258extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *); 258extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
259extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t); 259extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
260extern int __ufs_write_begin(struct file *file, struct address_space *mapping, 260extern int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len);
261 loff_t pos, unsigned len, unsigned flags,
262 struct page **pagep, void **fsdata);
263 261
264/* 262/*
265 * These functions manipulate ufs buffers 263 * These functions manipulate ufs buffers
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c8fb13f83b3f..0dce969d6cad 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -87,11 +87,9 @@ xfs-y += xfs_alloc.o \
87 xfs_trans_buf.o \ 87 xfs_trans_buf.o \
88 xfs_trans_extfree.o \ 88 xfs_trans_extfree.o \
89 xfs_trans_inode.o \ 89 xfs_trans_inode.o \
90 xfs_trans_item.o \
91 xfs_utils.o \ 90 xfs_utils.o \
92 xfs_vnodeops.o \ 91 xfs_vnodeops.o \
93 xfs_rw.o \ 92 xfs_rw.o
94 xfs_dmops.o
95 93
96xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o 94xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
97 95
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 9f769b5b38fc..b2771862fd3d 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -225,7 +225,7 @@ xfs_check_acl(struct inode *inode, int mask)
225 struct posix_acl *acl; 225 struct posix_acl *acl;
226 int error = -EAGAIN; 226 int error = -EAGAIN;
227 227
228 xfs_itrace_entry(ip); 228 trace_xfs_check_acl(ip);
229 229
230 /* 230 /*
231 * If there is no attribute fork no ACL exists on this inode and 231 * If there is no attribute fork no ACL exists on this inode and
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34640d6dbdcb..15412fe15c3a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -21,19 +21,12 @@
21#include "xfs_inum.h" 21#include "xfs_inum.h"
22#include "xfs_sb.h" 22#include "xfs_sb.h"
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_dir2.h"
25#include "xfs_trans.h" 24#include "xfs_trans.h"
26#include "xfs_dmapi.h"
27#include "xfs_mount.h" 25#include "xfs_mount.h"
28#include "xfs_bmap_btree.h" 26#include "xfs_bmap_btree.h"
29#include "xfs_alloc_btree.h"
30#include "xfs_ialloc_btree.h"
31#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 27#include "xfs_dinode.h"
34#include "xfs_inode.h" 28#include "xfs_inode.h"
35#include "xfs_alloc.h" 29#include "xfs_alloc.h"
36#include "xfs_btree.h"
37#include "xfs_error.h" 30#include "xfs_error.h"
38#include "xfs_rw.h" 31#include "xfs_rw.h"
39#include "xfs_iomap.h" 32#include "xfs_iomap.h"
@@ -92,18 +85,15 @@ void
92xfs_count_page_state( 85xfs_count_page_state(
93 struct page *page, 86 struct page *page,
94 int *delalloc, 87 int *delalloc,
95 int *unmapped,
96 int *unwritten) 88 int *unwritten)
97{ 89{
98 struct buffer_head *bh, *head; 90 struct buffer_head *bh, *head;
99 91
100 *delalloc = *unmapped = *unwritten = 0; 92 *delalloc = *unwritten = 0;
101 93
102 bh = head = page_buffers(page); 94 bh = head = page_buffers(page);
103 do { 95 do {
104 if (buffer_uptodate(bh) && !buffer_mapped(bh)) 96 if (buffer_unwritten(bh))
105 (*unmapped) = 1;
106 else if (buffer_unwritten(bh))
107 (*unwritten) = 1; 97 (*unwritten) = 1;
108 else if (buffer_delay(bh)) 98 else if (buffer_delay(bh))
109 (*delalloc) = 1; 99 (*delalloc) = 1;
@@ -212,23 +202,17 @@ xfs_setfilesize(
212} 202}
213 203
214/* 204/*
215 * Schedule IO completion handling on a xfsdatad if this was 205 * Schedule IO completion handling on the final put of an ioend.
216 * the final hold on this ioend. If we are asked to wait,
217 * flush the workqueue.
218 */ 206 */
219STATIC void 207STATIC void
220xfs_finish_ioend( 208xfs_finish_ioend(
221 xfs_ioend_t *ioend, 209 struct xfs_ioend *ioend)
222 int wait)
223{ 210{
224 if (atomic_dec_and_test(&ioend->io_remaining)) { 211 if (atomic_dec_and_test(&ioend->io_remaining)) {
225 struct workqueue_struct *wq; 212 if (ioend->io_type == IO_UNWRITTEN)
226 213 queue_work(xfsconvertd_workqueue, &ioend->io_work);
227 wq = (ioend->io_type == IO_UNWRITTEN) ? 214 else
228 xfsconvertd_workqueue : xfsdatad_workqueue; 215 queue_work(xfsdatad_workqueue, &ioend->io_work);
229 queue_work(wq, &ioend->io_work);
230 if (wait)
231 flush_workqueue(wq);
232 } 216 }
233} 217}
234 218
@@ -272,11 +256,25 @@ xfs_end_io(
272 */ 256 */
273 if (error == EAGAIN) { 257 if (error == EAGAIN) {
274 atomic_inc(&ioend->io_remaining); 258 atomic_inc(&ioend->io_remaining);
275 xfs_finish_ioend(ioend, 0); 259 xfs_finish_ioend(ioend);
276 /* ensure we don't spin on blocked ioends */ 260 /* ensure we don't spin on blocked ioends */
277 delay(1); 261 delay(1);
278 } else 262 } else {
263 if (ioend->io_iocb)
264 aio_complete(ioend->io_iocb, ioend->io_result, 0);
279 xfs_destroy_ioend(ioend); 265 xfs_destroy_ioend(ioend);
266 }
267}
268
269/*
270 * Call IO completion handling in caller context on the final put of an ioend.
271 */
272STATIC void
273xfs_finish_ioend_sync(
274 struct xfs_ioend *ioend)
275{
276 if (atomic_dec_and_test(&ioend->io_remaining))
277 xfs_end_io(&ioend->io_work);
280} 278}
281 279
282/* 280/*
@@ -309,6 +307,8 @@ xfs_alloc_ioend(
309 atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); 307 atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
310 ioend->io_offset = 0; 308 ioend->io_offset = 0;
311 ioend->io_size = 0; 309 ioend->io_size = 0;
310 ioend->io_iocb = NULL;
311 ioend->io_result = 0;
312 312
313 INIT_WORK(&ioend->io_work, xfs_end_io); 313 INIT_WORK(&ioend->io_work, xfs_end_io);
314 return ioend; 314 return ioend;
@@ -358,7 +358,7 @@ xfs_end_bio(
358 bio->bi_end_io = NULL; 358 bio->bi_end_io = NULL;
359 bio_put(bio); 359 bio_put(bio);
360 360
361 xfs_finish_ioend(ioend, 0); 361 xfs_finish_ioend(ioend);
362} 362}
363 363
364STATIC void 364STATIC void
@@ -500,7 +500,7 @@ xfs_submit_ioend(
500 } 500 }
501 if (bio) 501 if (bio)
502 xfs_submit_ioend_bio(wbc, ioend, bio); 502 xfs_submit_ioend_bio(wbc, ioend, bio);
503 xfs_finish_ioend(ioend, 0); 503 xfs_finish_ioend(ioend);
504 } while ((ioend = next) != NULL); 504 } while ((ioend = next) != NULL);
505} 505}
506 506
@@ -614,31 +614,30 @@ xfs_map_at_offset(
614STATIC unsigned int 614STATIC unsigned int
615xfs_probe_page( 615xfs_probe_page(
616 struct page *page, 616 struct page *page,
617 unsigned int pg_offset, 617 unsigned int pg_offset)
618 int mapped)
619{ 618{
619 struct buffer_head *bh, *head;
620 int ret = 0; 620 int ret = 0;
621 621
622 if (PageWriteback(page)) 622 if (PageWriteback(page))
623 return 0; 623 return 0;
624 if (!PageDirty(page))
625 return 0;
626 if (!page->mapping)
627 return 0;
628 if (!page_has_buffers(page))
629 return 0;
624 630
625 if (page->mapping && PageDirty(page)) { 631 bh = head = page_buffers(page);
626 if (page_has_buffers(page)) { 632 do {
627 struct buffer_head *bh, *head; 633 if (!buffer_uptodate(bh))
628 634 break;
629 bh = head = page_buffers(page); 635 if (!buffer_mapped(bh))
630 do { 636 break;
631 if (!buffer_uptodate(bh)) 637 ret += bh->b_size;
632 break; 638 if (ret >= pg_offset)
633 if (mapped != buffer_mapped(bh)) 639 break;
634 break; 640 } while ((bh = bh->b_this_page) != head);
635 ret += bh->b_size;
636 if (ret >= pg_offset)
637 break;
638 } while ((bh = bh->b_this_page) != head);
639 } else
640 ret = mapped ? 0 : PAGE_CACHE_SIZE;
641 }
642 641
643 return ret; 642 return ret;
644} 643}
@@ -648,8 +647,7 @@ xfs_probe_cluster(
648 struct inode *inode, 647 struct inode *inode,
649 struct page *startpage, 648 struct page *startpage,
650 struct buffer_head *bh, 649 struct buffer_head *bh,
651 struct buffer_head *head, 650 struct buffer_head *head)
652 int mapped)
653{ 651{
654 struct pagevec pvec; 652 struct pagevec pvec;
655 pgoff_t tindex, tlast, tloff; 653 pgoff_t tindex, tlast, tloff;
@@ -658,7 +656,7 @@ xfs_probe_cluster(
658 656
659 /* First sum forwards in this page */ 657 /* First sum forwards in this page */
660 do { 658 do {
661 if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) 659 if (!buffer_uptodate(bh) || !buffer_mapped(bh))
662 return total; 660 return total;
663 total += bh->b_size; 661 total += bh->b_size;
664 } while ((bh = bh->b_this_page) != head); 662 } while ((bh = bh->b_this_page) != head);
@@ -692,7 +690,7 @@ xfs_probe_cluster(
692 pg_offset = PAGE_CACHE_SIZE; 690 pg_offset = PAGE_CACHE_SIZE;
693 691
694 if (page->index == tindex && trylock_page(page)) { 692 if (page->index == tindex && trylock_page(page)) {
695 pg_len = xfs_probe_page(page, pg_offset, mapped); 693 pg_len = xfs_probe_page(page, pg_offset);
696 unlock_page(page); 694 unlock_page(page);
697 } 695 }
698 696
@@ -761,7 +759,6 @@ xfs_convert_page(
761 struct xfs_bmbt_irec *imap, 759 struct xfs_bmbt_irec *imap,
762 xfs_ioend_t **ioendp, 760 xfs_ioend_t **ioendp,
763 struct writeback_control *wbc, 761 struct writeback_control *wbc,
764 int startio,
765 int all_bh) 762 int all_bh)
766{ 763{
767 struct buffer_head *bh, *head; 764 struct buffer_head *bh, *head;
@@ -832,19 +829,14 @@ xfs_convert_page(
832 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 829 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
833 830
834 xfs_map_at_offset(inode, bh, imap, offset); 831 xfs_map_at_offset(inode, bh, imap, offset);
835 if (startio) { 832 xfs_add_to_ioend(inode, bh, offset, type,
836 xfs_add_to_ioend(inode, bh, offset, 833 ioendp, done);
837 type, ioendp, done); 834
838 } else {
839 set_buffer_dirty(bh);
840 unlock_buffer(bh);
841 mark_buffer_dirty(bh);
842 }
843 page_dirty--; 835 page_dirty--;
844 count++; 836 count++;
845 } else { 837 } else {
846 type = IO_NEW; 838 type = IO_NEW;
847 if (buffer_mapped(bh) && all_bh && startio) { 839 if (buffer_mapped(bh) && all_bh) {
848 lock_buffer(bh); 840 lock_buffer(bh);
849 xfs_add_to_ioend(inode, bh, offset, 841 xfs_add_to_ioend(inode, bh, offset,
850 type, ioendp, done); 842 type, ioendp, done);
@@ -859,14 +851,12 @@ xfs_convert_page(
859 if (uptodate && bh == head) 851 if (uptodate && bh == head)
860 SetPageUptodate(page); 852 SetPageUptodate(page);
861 853
862 if (startio) { 854 if (count) {
863 if (count) { 855 wbc->nr_to_write--;
864 wbc->nr_to_write--; 856 if (wbc->nr_to_write <= 0)
865 if (wbc->nr_to_write <= 0) 857 done = 1;
866 done = 1;
867 }
868 xfs_start_page_writeback(page, !page_dirty, count);
869 } 858 }
859 xfs_start_page_writeback(page, !page_dirty, count);
870 860
871 return done; 861 return done;
872 fail_unlock_page: 862 fail_unlock_page:
@@ -886,7 +876,6 @@ xfs_cluster_write(
886 struct xfs_bmbt_irec *imap, 876 struct xfs_bmbt_irec *imap,
887 xfs_ioend_t **ioendp, 877 xfs_ioend_t **ioendp,
888 struct writeback_control *wbc, 878 struct writeback_control *wbc,
889 int startio,
890 int all_bh, 879 int all_bh,
891 pgoff_t tlast) 880 pgoff_t tlast)
892{ 881{
@@ -902,7 +891,7 @@ xfs_cluster_write(
902 891
903 for (i = 0; i < pagevec_count(&pvec); i++) { 892 for (i = 0; i < pagevec_count(&pvec); i++) {
904 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 893 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
905 imap, ioendp, wbc, startio, all_bh); 894 imap, ioendp, wbc, all_bh);
906 if (done) 895 if (done)
907 break; 896 break;
908 } 897 }
@@ -981,7 +970,7 @@ xfs_aops_discard_page(
981 */ 970 */
982 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 971 error = xfs_bmapi(NULL, ip, offset_fsb, 1,
983 XFS_BMAPI_ENTIRE, NULL, 0, &imap, 972 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
984 &nimaps, NULL, NULL); 973 &nimaps, NULL);
985 974
986 if (error) { 975 if (error) {
987 /* something screwed, just bail */ 976 /* something screwed, just bail */
@@ -1009,7 +998,7 @@ xfs_aops_discard_page(
1009 */ 998 */
1010 xfs_bmap_init(&flist, &firstblock); 999 xfs_bmap_init(&flist, &firstblock);
1011 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, 1000 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
1012 &flist, NULL, &done); 1001 &flist, &done);
1013 1002
1014 ASSERT(!flist.xbf_count && !flist.xbf_first); 1003 ASSERT(!flist.xbf_count && !flist.xbf_first);
1015 if (error) { 1004 if (error) {
@@ -1032,50 +1021,66 @@ out_invalidate:
1032} 1021}
1033 1022
1034/* 1023/*
1035 * Calling this without startio set means we are being asked to make a dirty 1024 * Write out a dirty page.
1036 * page ready for freeing it's buffers. When called with startio set then
1037 * we are coming from writepage.
1038 * 1025 *
1039 * When called with startio set it is important that we write the WHOLE 1026 * For delalloc space on the page we need to allocate space and flush it.
1040 * page if possible. 1027 * For unwritten space on the page we need to start the conversion to
1041 * The bh->b_state's cannot know if any of the blocks or which block for 1028 * regular allocated space.
1042 * that matter are dirty due to mmap writes, and therefore bh uptodate is 1029 * For any other dirty buffer heads on the page we should flush them.
1043 * only valid if the page itself isn't completely uptodate. Some layers 1030 *
1044 * may clear the page dirty flag prior to calling write page, under the 1031 * If we detect that a transaction would be required to flush the page, we
1045 * assumption the entire page will be written out; by not writing out the 1032 * have to check the process flags first, if we are already in a transaction
1046 * whole page the page can be reused before all valid dirty data is 1033 * or disk I/O during allocations is off, we need to fail the writepage and
1047 * written out. Note: in the case of a page that has been dirty'd by 1034 * redirty the page.
1048 * mapwrite and but partially setup by block_prepare_write the
1049 * bh->b_states's will not agree and only ones setup by BPW/BCW will have
1050 * valid state, thus the whole page must be written out thing.
1051 */ 1035 */
1052
1053STATIC int 1036STATIC int
1054xfs_page_state_convert( 1037xfs_vm_writepage(
1055 struct inode *inode, 1038 struct page *page,
1056 struct page *page, 1039 struct writeback_control *wbc)
1057 struct writeback_control *wbc,
1058 int startio,
1059 int unmapped) /* also implies page uptodate */
1060{ 1040{
1041 struct inode *inode = page->mapping->host;
1042 int delalloc, unwritten;
1061 struct buffer_head *bh, *head; 1043 struct buffer_head *bh, *head;
1062 struct xfs_bmbt_irec imap; 1044 struct xfs_bmbt_irec imap;
1063 xfs_ioend_t *ioend = NULL, *iohead = NULL; 1045 xfs_ioend_t *ioend = NULL, *iohead = NULL;
1064 loff_t offset; 1046 loff_t offset;
1065 unsigned long p_offset = 0;
1066 unsigned int type; 1047 unsigned int type;
1067 __uint64_t end_offset; 1048 __uint64_t end_offset;
1068 pgoff_t end_index, last_index; 1049 pgoff_t end_index, last_index;
1069 ssize_t size, len; 1050 ssize_t size, len;
1070 int flags, err, imap_valid = 0, uptodate = 1; 1051 int flags, err, imap_valid = 0, uptodate = 1;
1071 int page_dirty, count = 0; 1052 int count = 0;
1072 int trylock = 0; 1053 int all_bh = 0;
1073 int all_bh = unmapped;
1074 1054
1075 if (startio) { 1055 trace_xfs_writepage(inode, page, 0);
1076 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) 1056
1077 trylock |= BMAPI_TRYLOCK; 1057 ASSERT(page_has_buffers(page));
1078 } 1058
1059 /*
1060 * Refuse to write the page out if we are called from reclaim context.
1061 *
1062 * This avoids stack overflows when called from deeply used stacks in
1063 * random callers for direct reclaim or memcg reclaim. We explicitly
1064 * allow reclaim from kswapd as the stack usage there is relatively low.
1065 *
1066 * This should really be done by the core VM, but until that happens
1067 * filesystems like XFS, btrfs and ext4 have to take care of this
1068 * by themselves.
1069 */
1070 if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
1071 goto out_fail;
1072
1073 /*
1074 * We need a transaction if there are delalloc or unwritten buffers
1075 * on the page.
1076 *
1077 * If we need a transaction and the process flags say we are already
1078 * in a transaction, or no IO is allowed then mark the page dirty
1079 * again and leave the page as is.
1080 */
1081 xfs_count_page_state(page, &delalloc, &unwritten);
1082 if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
1083 goto out_fail;
1079 1084
1080 /* Is this page beyond the end of the file? */ 1085 /* Is this page beyond the end of the file? */
1081 offset = i_size_read(inode); 1086 offset = i_size_read(inode);
@@ -1084,50 +1089,33 @@ xfs_page_state_convert(
1084 if (page->index >= end_index) { 1089 if (page->index >= end_index) {
1085 if ((page->index >= end_index + 1) || 1090 if ((page->index >= end_index + 1) ||
1086 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 1091 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
1087 if (startio) 1092 unlock_page(page);
1088 unlock_page(page);
1089 return 0; 1093 return 0;
1090 } 1094 }
1091 } 1095 }
1092 1096
1093 /*
1094 * page_dirty is initially a count of buffers on the page before
1095 * EOF and is decremented as we move each into a cleanable state.
1096 *
1097 * Derivation:
1098 *
1099 * End offset is the highest offset that this page should represent.
1100 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
1101 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
1102 * hence give us the correct page_dirty count. On any other page,
1103 * it will be zero and in that case we need page_dirty to be the
1104 * count of buffers on the page.
1105 */
1106 end_offset = min_t(unsigned long long, 1097 end_offset = min_t(unsigned long long,
1107 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); 1098 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
1099 offset);
1108 len = 1 << inode->i_blkbits; 1100 len = 1 << inode->i_blkbits;
1109 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
1110 PAGE_CACHE_SIZE);
1111 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
1112 page_dirty = p_offset / len;
1113 1101
1114 bh = head = page_buffers(page); 1102 bh = head = page_buffers(page);
1115 offset = page_offset(page); 1103 offset = page_offset(page);
1116 flags = BMAPI_READ; 1104 flags = BMAPI_READ;
1117 type = IO_NEW; 1105 type = IO_NEW;
1118 1106
1119 /* TODO: cleanup count and page_dirty */
1120
1121 do { 1107 do {
1122 if (offset >= end_offset) 1108 if (offset >= end_offset)
1123 break; 1109 break;
1124 if (!buffer_uptodate(bh)) 1110 if (!buffer_uptodate(bh))
1125 uptodate = 0; 1111 uptodate = 0;
1126 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { 1112
1127 /* 1113 /*
1128 * the iomap is actually still valid, but the ioend 1114 * A hole may still be marked uptodate because discard_buffer
1129 * isn't. shouldn't happen too often. 1115 * leaves the flag set.
1130 */ 1116 */
1117 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
1118 ASSERT(!buffer_dirty(bh));
1131 imap_valid = 0; 1119 imap_valid = 0;
1132 continue; 1120 continue;
1133 } 1121 }
@@ -1135,19 +1123,7 @@ xfs_page_state_convert(
1135 if (imap_valid) 1123 if (imap_valid)
1136 imap_valid = xfs_imap_valid(inode, &imap, offset); 1124 imap_valid = xfs_imap_valid(inode, &imap, offset);
1137 1125
1138 /* 1126 if (buffer_unwritten(bh) || buffer_delay(bh)) {
1139 * First case, map an unwritten extent and prepare for
1140 * extent state conversion transaction on completion.
1141 *
1142 * Second case, allocate space for a delalloc buffer.
1143 * We can return EAGAIN here in the release page case.
1144 *
1145 * Third case, an unmapped buffer was found, and we are
1146 * in a path where we need to write the whole page out.
1147 */
1148 if (buffer_unwritten(bh) || buffer_delay(bh) ||
1149 ((buffer_uptodate(bh) || PageUptodate(page)) &&
1150 !buffer_mapped(bh) && (unmapped || startio))) {
1151 int new_ioend = 0; 1127 int new_ioend = 0;
1152 1128
1153 /* 1129 /*
@@ -1161,15 +1137,16 @@ xfs_page_state_convert(
1161 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 1137 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
1162 } else if (buffer_delay(bh)) { 1138 } else if (buffer_delay(bh)) {
1163 type = IO_DELAY; 1139 type = IO_DELAY;
1164 flags = BMAPI_ALLOCATE | trylock; 1140 flags = BMAPI_ALLOCATE;
1165 } else { 1141
1166 type = IO_NEW; 1142 if (wbc->sync_mode == WB_SYNC_NONE &&
1167 flags = BMAPI_WRITE | BMAPI_MMAP; 1143 wbc->nonblocking)
1144 flags |= BMAPI_TRYLOCK;
1168 } 1145 }
1169 1146
1170 if (!imap_valid) { 1147 if (!imap_valid) {
1171 /* 1148 /*
1172 * if we didn't have a valid mapping then we 1149 * If we didn't have a valid mapping then we
1173 * need to ensure that we put the new mapping 1150 * need to ensure that we put the new mapping
1174 * in a new ioend structure. This needs to be 1151 * in a new ioend structure. This needs to be
1175 * done to ensure that the ioends correctly 1152 * done to ensure that the ioends correctly
@@ -1177,14 +1154,7 @@ xfs_page_state_convert(
1177 * for unwritten extent conversion. 1154 * for unwritten extent conversion.
1178 */ 1155 */
1179 new_ioend = 1; 1156 new_ioend = 1;
1180 if (type == IO_NEW) { 1157 err = xfs_map_blocks(inode, offset, len,
1181 size = xfs_probe_cluster(inode,
1182 page, bh, head, 0);
1183 } else {
1184 size = len;
1185 }
1186
1187 err = xfs_map_blocks(inode, offset, size,
1188 &imap, flags); 1158 &imap, flags);
1189 if (err) 1159 if (err)
1190 goto error; 1160 goto error;
@@ -1193,19 +1163,11 @@ xfs_page_state_convert(
1193 } 1163 }
1194 if (imap_valid) { 1164 if (imap_valid) {
1195 xfs_map_at_offset(inode, bh, &imap, offset); 1165 xfs_map_at_offset(inode, bh, &imap, offset);
1196 if (startio) { 1166 xfs_add_to_ioend(inode, bh, offset, type,
1197 xfs_add_to_ioend(inode, bh, offset, 1167 &ioend, new_ioend);
1198 type, &ioend,
1199 new_ioend);
1200 } else {
1201 set_buffer_dirty(bh);
1202 unlock_buffer(bh);
1203 mark_buffer_dirty(bh);
1204 }
1205 page_dirty--;
1206 count++; 1168 count++;
1207 } 1169 }
1208 } else if (buffer_uptodate(bh) && startio) { 1170 } else if (buffer_uptodate(bh)) {
1209 /* 1171 /*
1210 * we got here because the buffer is already mapped. 1172 * we got here because the buffer is already mapped.
1211 * That means it must already have extents allocated 1173 * That means it must already have extents allocated
@@ -1213,8 +1175,7 @@ xfs_page_state_convert(
1213 */ 1175 */
1214 if (!imap_valid || flags != BMAPI_READ) { 1176 if (!imap_valid || flags != BMAPI_READ) {
1215 flags = BMAPI_READ; 1177 flags = BMAPI_READ;
1216 size = xfs_probe_cluster(inode, page, bh, 1178 size = xfs_probe_cluster(inode, page, bh, head);
1217 head, 1);
1218 err = xfs_map_blocks(inode, offset, size, 1179 err = xfs_map_blocks(inode, offset, size,
1219 &imap, flags); 1180 &imap, flags);
1220 if (err) 1181 if (err)
@@ -1233,18 +1194,16 @@ xfs_page_state_convert(
1233 */ 1194 */
1234 type = IO_NEW; 1195 type = IO_NEW;
1235 if (trylock_buffer(bh)) { 1196 if (trylock_buffer(bh)) {
1236 ASSERT(buffer_mapped(bh));
1237 if (imap_valid) 1197 if (imap_valid)
1238 all_bh = 1; 1198 all_bh = 1;
1239 xfs_add_to_ioend(inode, bh, offset, type, 1199 xfs_add_to_ioend(inode, bh, offset, type,
1240 &ioend, !imap_valid); 1200 &ioend, !imap_valid);
1241 page_dirty--;
1242 count++; 1201 count++;
1243 } else { 1202 } else {
1244 imap_valid = 0; 1203 imap_valid = 0;
1245 } 1204 }
1246 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 1205 } else if (PageUptodate(page)) {
1247 (unmapped || startio)) { 1206 ASSERT(buffer_mapped(bh));
1248 imap_valid = 0; 1207 imap_valid = 0;
1249 } 1208 }
1250 1209
@@ -1256,8 +1215,7 @@ xfs_page_state_convert(
1256 if (uptodate && bh == head) 1215 if (uptodate && bh == head)
1257 SetPageUptodate(page); 1216 SetPageUptodate(page);
1258 1217
1259 if (startio) 1218 xfs_start_page_writeback(page, 1, count);
1260 xfs_start_page_writeback(page, 1, count);
1261 1219
1262 if (ioend && imap_valid) { 1220 if (ioend && imap_valid) {
1263 xfs_off_t end_index; 1221 xfs_off_t end_index;
@@ -1275,131 +1233,27 @@ xfs_page_state_convert(
1275 end_index = last_index; 1233 end_index = last_index;
1276 1234
1277 xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1235 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1278 wbc, startio, all_bh, end_index); 1236 wbc, all_bh, end_index);
1279 } 1237 }
1280 1238
1281 if (iohead) 1239 if (iohead)
1282 xfs_submit_ioend(wbc, iohead); 1240 xfs_submit_ioend(wbc, iohead);
1283 1241
1284 return page_dirty; 1242 return 0;
1285 1243
1286error: 1244error:
1287 if (iohead) 1245 if (iohead)
1288 xfs_cancel_ioend(iohead); 1246 xfs_cancel_ioend(iohead);
1289 1247
1290 /* 1248 xfs_aops_discard_page(page);
1291 * If it's delalloc and we have nowhere to put it, 1249 ClearPageUptodate(page);
1292 * throw it away, unless the lower layers told 1250 unlock_page(page);
1293 * us to try again.
1294 */
1295 if (err != -EAGAIN) {
1296 if (!unmapped)
1297 xfs_aops_discard_page(page);
1298 ClearPageUptodate(page);
1299 }
1300 return err; 1251 return err;
1301}
1302
1303/*
1304 * writepage: Called from one of two places:
1305 *
1306 * 1. we are flushing a delalloc buffer head.
1307 *
1308 * 2. we are writing out a dirty page. Typically the page dirty
1309 * state is cleared before we get here. In this case is it
1310 * conceivable we have no buffer heads.
1311 *
1312 * For delalloc space on the page we need to allocate space and
1313 * flush it. For unmapped buffer heads on the page we should
1314 * allocate space if the page is uptodate. For any other dirty
1315 * buffer heads on the page we should flush them.
1316 *
1317 * If we detect that a transaction would be required to flush
1318 * the page, we have to check the process flags first, if we
1319 * are already in a transaction or disk I/O during allocations
1320 * is off, we need to fail the writepage and redirty the page.
1321 */
1322
1323STATIC int
1324xfs_vm_writepage(
1325 struct page *page,
1326 struct writeback_control *wbc)
1327{
1328 int error;
1329 int need_trans;
1330 int delalloc, unmapped, unwritten;
1331 struct inode *inode = page->mapping->host;
1332
1333 trace_xfs_writepage(inode, page, 0);
1334
1335 /*
1336 * Refuse to write the page out if we are called from reclaim context.
1337 *
1338 * This is primarily to avoid stack overflows when called from deep
1339 * used stacks in random callers for direct reclaim, but disabling
1340 * reclaim for kswap is a nice side-effect as kswapd causes rather
1341 * suboptimal I/O patters, too.
1342 *
1343 * This should really be done by the core VM, but until that happens
1344 * filesystems like XFS, btrfs and ext4 have to take care of this
1345 * by themselves.
1346 */
1347 if (current->flags & PF_MEMALLOC)
1348 goto out_fail;
1349
1350 /*
1351 * We need a transaction if:
1352 * 1. There are delalloc buffers on the page
1353 * 2. The page is uptodate and we have unmapped buffers
1354 * 3. The page is uptodate and we have no buffers
1355 * 4. There are unwritten buffers on the page
1356 */
1357
1358 if (!page_has_buffers(page)) {
1359 unmapped = 1;
1360 need_trans = 1;
1361 } else {
1362 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1363 if (!PageUptodate(page))
1364 unmapped = 0;
1365 need_trans = delalloc + unmapped + unwritten;
1366 }
1367
1368 /*
1369 * If we need a transaction and the process flags say
1370 * we are already in a transaction, or no IO is allowed
1371 * then mark the page dirty again and leave the page
1372 * as is.
1373 */
1374 if (current_test_flags(PF_FSTRANS) && need_trans)
1375 goto out_fail;
1376
1377 /*
1378 * Delay hooking up buffer heads until we have
1379 * made our go/no-go decision.
1380 */
1381 if (!page_has_buffers(page))
1382 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1383
1384 /*
1385 * Convert delayed allocate, unwritten or unmapped space
1386 * to real space and flush out to disk.
1387 */
1388 error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
1389 if (error == -EAGAIN)
1390 goto out_fail;
1391 if (unlikely(error < 0))
1392 goto out_unlock;
1393
1394 return 0;
1395 1252
1396out_fail: 1253out_fail:
1397 redirty_page_for_writepage(wbc, page); 1254 redirty_page_for_writepage(wbc, page);
1398 unlock_page(page); 1255 unlock_page(page);
1399 return 0; 1256 return 0;
1400out_unlock:
1401 unlock_page(page);
1402 return error;
1403} 1257}
1404 1258
1405STATIC int 1259STATIC int
@@ -1413,65 +1267,27 @@ xfs_vm_writepages(
1413 1267
1414/* 1268/*
1415 * Called to move a page into cleanable state - and from there 1269 * Called to move a page into cleanable state - and from there
1416 * to be released. Possibly the page is already clean. We always 1270 * to be released. The page should already be clean. We always
1417 * have buffer heads in this call. 1271 * have buffer heads in this call.
1418 * 1272 *
1419 * Returns 0 if the page is ok to release, 1 otherwise. 1273 * Returns 1 if the page is ok to release, 0 otherwise.
1420 *
1421 * Possible scenarios are:
1422 *
1423 * 1. We are being called to release a page which has been written
1424 * to via regular I/O. buffer heads will be dirty and possibly
1425 * delalloc. If no delalloc buffer heads in this case then we
1426 * can just return zero.
1427 *
1428 * 2. We are called to release a page which has been written via
1429 * mmap, all we need to do is ensure there is no delalloc
1430 * state in the buffer heads, if not we can let the caller
1431 * free them and we should come back later via writepage.
1432 */ 1274 */
1433STATIC int 1275STATIC int
1434xfs_vm_releasepage( 1276xfs_vm_releasepage(
1435 struct page *page, 1277 struct page *page,
1436 gfp_t gfp_mask) 1278 gfp_t gfp_mask)
1437{ 1279{
1438 struct inode *inode = page->mapping->host; 1280 int delalloc, unwritten;
1439 int dirty, delalloc, unmapped, unwritten;
1440 struct writeback_control wbc = {
1441 .sync_mode = WB_SYNC_ALL,
1442 .nr_to_write = 1,
1443 };
1444 1281
1445 trace_xfs_releasepage(inode, page, 0); 1282 trace_xfs_releasepage(page->mapping->host, page, 0);
1446 1283
1447 if (!page_has_buffers(page)) 1284 xfs_count_page_state(page, &delalloc, &unwritten);
1448 return 0;
1449
1450 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1451 if (!delalloc && !unwritten)
1452 goto free_buffers;
1453 1285
1454 if (!(gfp_mask & __GFP_FS)) 1286 if (WARN_ON(delalloc))
1455 return 0; 1287 return 0;
1456 1288 if (WARN_ON(unwritten))
1457 /* If we are already inside a transaction or the thread cannot
1458 * do I/O, we cannot release this page.
1459 */
1460 if (current_test_flags(PF_FSTRANS))
1461 return 0; 1289 return 0;
1462 1290
1463 /*
1464 * Convert delalloc space to real space, do not flush the
1465 * data out to disk, that will be done by the caller.
1466 * Never need to allocate space here - we will always
1467 * come back to writepage in that case.
1468 */
1469 dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
1470 if (dirty == 0 && !unwritten)
1471 goto free_buffers;
1472 return 0;
1473
1474free_buffers:
1475 return try_to_free_buffers(page); 1291 return try_to_free_buffers(page);
1476} 1292}
1477 1293
@@ -1481,9 +1297,9 @@ __xfs_get_blocks(
1481 sector_t iblock, 1297 sector_t iblock,
1482 struct buffer_head *bh_result, 1298 struct buffer_head *bh_result,
1483 int create, 1299 int create,
1484 int direct, 1300 int direct)
1485 bmapi_flags_t flags)
1486{ 1301{
1302 int flags = create ? BMAPI_WRITE : BMAPI_READ;
1487 struct xfs_bmbt_irec imap; 1303 struct xfs_bmbt_irec imap;
1488 xfs_off_t offset; 1304 xfs_off_t offset;
1489 ssize_t size; 1305 ssize_t size;
@@ -1498,8 +1314,11 @@ __xfs_get_blocks(
1498 if (!create && direct && offset >= i_size_read(inode)) 1314 if (!create && direct && offset >= i_size_read(inode))
1499 return 0; 1315 return 0;
1500 1316
1501 error = xfs_iomap(XFS_I(inode), offset, size, 1317 if (direct && create)
1502 create ? flags : BMAPI_READ, &imap, &nimap, &new); 1318 flags |= BMAPI_DIRECT;
1319
1320 error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
1321 &new);
1503 if (error) 1322 if (error)
1504 return -error; 1323 return -error;
1505 if (nimap == 0) 1324 if (nimap == 0)
@@ -1579,8 +1398,7 @@ xfs_get_blocks(
1579 struct buffer_head *bh_result, 1398 struct buffer_head *bh_result,
1580 int create) 1399 int create)
1581{ 1400{
1582 return __xfs_get_blocks(inode, iblock, 1401 return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
1583 bh_result, create, 0, BMAPI_WRITE);
1584} 1402}
1585 1403
1586STATIC int 1404STATIC int
@@ -1590,61 +1408,59 @@ xfs_get_blocks_direct(
1590 struct buffer_head *bh_result, 1408 struct buffer_head *bh_result,
1591 int create) 1409 int create)
1592{ 1410{
1593 return __xfs_get_blocks(inode, iblock, 1411 return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
1594 bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
1595} 1412}
1596 1413
1414/*
1415 * Complete a direct I/O write request.
1416 *
1417 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1418 * need to issue a transaction to convert the range from unwritten to written
1419 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1420 * to do this and we are done. But in case this was a successfull AIO
1421 * request this handler is called from interrupt context, from which we
1422 * can't start transactions. In that case offload the I/O completion to
1423 * the workqueues we also use for buffered I/O completion.
1424 */
1597STATIC void 1425STATIC void
1598xfs_end_io_direct( 1426xfs_end_io_direct_write(
1599 struct kiocb *iocb, 1427 struct kiocb *iocb,
1600 loff_t offset, 1428 loff_t offset,
1601 ssize_t size, 1429 ssize_t size,
1602 void *private) 1430 void *private,
1431 int ret,
1432 bool is_async)
1603{ 1433{
1604 xfs_ioend_t *ioend = iocb->private; 1434 struct xfs_ioend *ioend = iocb->private;
1605 1435
1606 /* 1436 /*
1607 * Non-NULL private data means we need to issue a transaction to 1437 * blockdev_direct_IO can return an error even after the I/O
1608 * convert a range from unwritten to written extents. This needs 1438 * completion handler was called. Thus we need to protect
1609 * to happen from process context but aio+dio I/O completion 1439 * against double-freeing.
1610 * happens from irq context so we need to defer it to a workqueue.
1611 * This is not necessary for synchronous direct I/O, but we do
1612 * it anyway to keep the code uniform and simpler.
1613 *
1614 * Well, if only it were that simple. Because synchronous direct I/O
1615 * requires extent conversion to occur *before* we return to userspace,
1616 * we have to wait for extent conversion to complete. Look at the
1617 * iocb that has been passed to us to determine if this is AIO or
1618 * not. If it is synchronous, tell xfs_finish_ioend() to kick the
1619 * workqueue and wait for it to complete.
1620 *
1621 * The core direct I/O code might be changed to always call the
1622 * completion handler in the future, in which case all this can
1623 * go away.
1624 */ 1440 */
1441 iocb->private = NULL;
1442
1625 ioend->io_offset = offset; 1443 ioend->io_offset = offset;
1626 ioend->io_size = size; 1444 ioend->io_size = size;
1627 if (ioend->io_type == IO_READ) { 1445 if (private && size > 0)
1628 xfs_finish_ioend(ioend, 0); 1446 ioend->io_type = IO_UNWRITTEN;
1629 } else if (private && size > 0) { 1447
1630 xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); 1448 if (is_async) {
1631 } else {
1632 /* 1449 /*
1633 * A direct I/O write ioend starts it's life in unwritten 1450 * If we are converting an unwritten extent we need to delay
1634 * state in case they map an unwritten extent. This write 1451 * the AIO completion until after the unwrittent extent
1635 * didn't map an unwritten extent so switch it's completion 1452 * conversion has completed, otherwise do it ASAP.
1636 * handler.
1637 */ 1453 */
1638 ioend->io_type = IO_NEW; 1454 if (ioend->io_type == IO_UNWRITTEN) {
1639 xfs_finish_ioend(ioend, 0); 1455 ioend->io_iocb = iocb;
1456 ioend->io_result = ret;
1457 } else {
1458 aio_complete(iocb, ret, 0);
1459 }
1460 xfs_finish_ioend(ioend);
1461 } else {
1462 xfs_finish_ioend_sync(ioend);
1640 } 1463 }
1641
1642 /*
1643 * blockdev_direct_IO can return an error even after the I/O
1644 * completion handler was called. Thus we need to protect
1645 * against double-freeing.
1646 */
1647 iocb->private = NULL;
1648} 1464}
1649 1465
1650STATIC ssize_t 1466STATIC ssize_t
@@ -1655,26 +1471,45 @@ xfs_vm_direct_IO(
1655 loff_t offset, 1471 loff_t offset,
1656 unsigned long nr_segs) 1472 unsigned long nr_segs)
1657{ 1473{
1658 struct file *file = iocb->ki_filp; 1474 struct inode *inode = iocb->ki_filp->f_mapping->host;
1659 struct inode *inode = file->f_mapping->host; 1475 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1660 struct block_device *bdev; 1476 ssize_t ret;
1661 ssize_t ret;
1662 1477
1663 bdev = xfs_find_bdev_for_inode(inode); 1478 if (rw & WRITE) {
1479 iocb->private = xfs_alloc_ioend(inode, IO_NEW);
1664 1480
1665 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? 1481 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1666 IO_UNWRITTEN : IO_READ);
1667
1668 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1669 offset, nr_segs, 1482 offset, nr_segs,
1670 xfs_get_blocks_direct, 1483 xfs_get_blocks_direct,
1671 xfs_end_io_direct); 1484 xfs_end_io_direct_write, NULL, 0);
1485 if (ret != -EIOCBQUEUED && iocb->private)
1486 xfs_destroy_ioend(iocb->private);
1487 } else {
1488 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1489 offset, nr_segs,
1490 xfs_get_blocks_direct,
1491 NULL, NULL, 0);
1492 }
1672 1493
1673 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1674 xfs_destroy_ioend(iocb->private);
1675 return ret; 1494 return ret;
1676} 1495}
1677 1496
1497STATIC void
1498xfs_vm_write_failed(
1499 struct address_space *mapping,
1500 loff_t to)
1501{
1502 struct inode *inode = mapping->host;
1503
1504 if (to > inode->i_size) {
1505 struct iattr ia = {
1506 .ia_valid = ATTR_SIZE | ATTR_FORCE,
1507 .ia_size = inode->i_size,
1508 };
1509 xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
1510 }
1511}
1512
1678STATIC int 1513STATIC int
1679xfs_vm_write_begin( 1514xfs_vm_write_begin(
1680 struct file *file, 1515 struct file *file,
@@ -1685,9 +1520,31 @@ xfs_vm_write_begin(
1685 struct page **pagep, 1520 struct page **pagep,
1686 void **fsdata) 1521 void **fsdata)
1687{ 1522{
1688 *pagep = NULL; 1523 int ret;
1689 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1524
1690 xfs_get_blocks); 1525 ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
1526 pagep, xfs_get_blocks);
1527 if (unlikely(ret))
1528 xfs_vm_write_failed(mapping, pos + len);
1529 return ret;
1530}
1531
1532STATIC int
1533xfs_vm_write_end(
1534 struct file *file,
1535 struct address_space *mapping,
1536 loff_t pos,
1537 unsigned len,
1538 unsigned copied,
1539 struct page *page,
1540 void *fsdata)
1541{
1542 int ret;
1543
1544 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1545 if (unlikely(ret < len))
1546 xfs_vm_write_failed(mapping, pos + len);
1547 return ret;
1691} 1548}
1692 1549
1693STATIC sector_t 1550STATIC sector_t
@@ -1698,7 +1555,7 @@ xfs_vm_bmap(
1698 struct inode *inode = (struct inode *)mapping->host; 1555 struct inode *inode = (struct inode *)mapping->host;
1699 struct xfs_inode *ip = XFS_I(inode); 1556 struct xfs_inode *ip = XFS_I(inode);
1700 1557
1701 xfs_itrace_entry(XFS_I(inode)); 1558 trace_xfs_vm_bmap(XFS_I(inode));
1702 xfs_ilock(ip, XFS_IOLOCK_SHARED); 1559 xfs_ilock(ip, XFS_IOLOCK_SHARED);
1703 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); 1560 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
1704 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1561 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -1732,7 +1589,7 @@ const struct address_space_operations xfs_address_space_operations = {
1732 .releasepage = xfs_vm_releasepage, 1589 .releasepage = xfs_vm_releasepage,
1733 .invalidatepage = xfs_vm_invalidatepage, 1590 .invalidatepage = xfs_vm_invalidatepage,
1734 .write_begin = xfs_vm_write_begin, 1591 .write_begin = xfs_vm_write_begin,
1735 .write_end = generic_write_end, 1592 .write_end = xfs_vm_write_end,
1736 .bmap = xfs_vm_bmap, 1593 .bmap = xfs_vm_bmap,
1737 .direct_IO = xfs_vm_direct_IO, 1594 .direct_IO = xfs_vm_direct_IO,
1738 .migratepage = buffer_migrate_page, 1595 .migratepage = buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4cfc6ea87df8..c5057fb6237a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
37 size_t io_size; /* size of the extent */ 37 size_t io_size; /* size of the extent */
38 xfs_off_t io_offset; /* offset in the file */ 38 xfs_off_t io_offset; /* offset in the file */
39 struct work_struct io_work; /* xfsdatad work queue */ 39 struct work_struct io_work; /* xfsdatad work queue */
40 struct kiocb *io_iocb;
41 int io_result;
40} xfs_ioend_t; 42} xfs_ioend_t;
41 43
42extern const struct address_space_operations xfs_address_space_operations; 44extern const struct address_space_operations xfs_address_space_operations;
@@ -45,6 +47,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45extern void xfs_ioend_init(void); 47extern void xfs_ioend_init(void);
46extern void xfs_ioend_wait(struct xfs_inode *); 48extern void xfs_ioend_wait(struct xfs_inode *);
47 49
48extern void xfs_count_page_state(struct page *, int *, int *, int *); 50extern void xfs_count_page_state(struct page *, int *, int *);
49 51
50#endif /* __XFS_AOPS_H__ */ 52#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 2ee3f7a60163..ea79072f5210 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -39,7 +39,6 @@
39#include "xfs_inum.h" 39#include "xfs_inum.h"
40#include "xfs_log.h" 40#include "xfs_log.h"
41#include "xfs_ag.h" 41#include "xfs_ag.h"
42#include "xfs_dmapi.h"
43#include "xfs_mount.h" 42#include "xfs_mount.h"
44#include "xfs_trace.h" 43#include "xfs_trace.h"
45 44
@@ -579,9 +578,9 @@ _xfs_buf_read(
579 XBF_READ_AHEAD | _XBF_RUN_QUEUES); 578 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
580 579
581 status = xfs_buf_iorequest(bp); 580 status = xfs_buf_iorequest(bp);
582 if (!status && !(flags & XBF_ASYNC)) 581 if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
583 status = xfs_buf_iowait(bp); 582 return status;
584 return status; 583 return xfs_buf_iowait(bp);
585} 584}
586 585
587xfs_buf_t * 586xfs_buf_t *
@@ -897,36 +896,6 @@ xfs_buf_unlock(
897 trace_xfs_buf_unlock(bp, _RET_IP_); 896 trace_xfs_buf_unlock(bp, _RET_IP_);
898} 897}
899 898
900
901/*
902 * Pinning Buffer Storage in Memory
903 * Ensure that no attempt to force a buffer to disk will succeed.
904 */
905void
906xfs_buf_pin(
907 xfs_buf_t *bp)
908{
909 trace_xfs_buf_pin(bp, _RET_IP_);
910 atomic_inc(&bp->b_pin_count);
911}
912
913void
914xfs_buf_unpin(
915 xfs_buf_t *bp)
916{
917 trace_xfs_buf_unpin(bp, _RET_IP_);
918
919 if (atomic_dec_and_test(&bp->b_pin_count))
920 wake_up_all(&bp->b_waiters);
921}
922
923int
924xfs_buf_ispin(
925 xfs_buf_t *bp)
926{
927 return atomic_read(&bp->b_pin_count);
928}
929
930STATIC void 899STATIC void
931xfs_buf_wait_unpin( 900xfs_buf_wait_unpin(
932 xfs_buf_t *bp) 901 xfs_buf_t *bp)
@@ -1018,13 +987,12 @@ xfs_bwrite(
1018{ 987{
1019 int error; 988 int error;
1020 989
1021 bp->b_strat = xfs_bdstrat_cb;
1022 bp->b_mount = mp; 990 bp->b_mount = mp;
1023 bp->b_flags |= XBF_WRITE; 991 bp->b_flags |= XBF_WRITE;
1024 bp->b_flags &= ~(XBF_ASYNC | XBF_READ); 992 bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
1025 993
1026 xfs_buf_delwri_dequeue(bp); 994 xfs_buf_delwri_dequeue(bp);
1027 xfs_buf_iostrategy(bp); 995 xfs_bdstrat_cb(bp);
1028 996
1029 error = xfs_buf_iowait(bp); 997 error = xfs_buf_iowait(bp);
1030 if (error) 998 if (error)
@@ -1040,7 +1008,6 @@ xfs_bdwrite(
1040{ 1008{
1041 trace_xfs_buf_bdwrite(bp, _RET_IP_); 1009 trace_xfs_buf_bdwrite(bp, _RET_IP_);
1042 1010
1043 bp->b_strat = xfs_bdstrat_cb;
1044 bp->b_mount = mp; 1011 bp->b_mount = mp;
1045 1012
1046 bp->b_flags &= ~XBF_READ; 1013 bp->b_flags &= ~XBF_READ;
@@ -1075,7 +1042,6 @@ xfs_bioerror(
1075 XFS_BUF_UNDONE(bp); 1042 XFS_BUF_UNDONE(bp);
1076 XFS_BUF_STALE(bp); 1043 XFS_BUF_STALE(bp);
1077 1044
1078 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1079 xfs_biodone(bp); 1045 xfs_biodone(bp);
1080 1046
1081 return EIO; 1047 return EIO;
@@ -1105,7 +1071,6 @@ xfs_bioerror_relse(
1105 XFS_BUF_DONE(bp); 1071 XFS_BUF_DONE(bp);
1106 XFS_BUF_STALE(bp); 1072 XFS_BUF_STALE(bp);
1107 XFS_BUF_CLR_IODONE_FUNC(bp); 1073 XFS_BUF_CLR_IODONE_FUNC(bp);
1108 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1109 if (!(fl & XBF_ASYNC)) { 1074 if (!(fl & XBF_ASYNC)) {
1110 /* 1075 /*
1111 * Mark b_error and B_ERROR _both_. 1076 * Mark b_error and B_ERROR _both_.
@@ -1311,8 +1276,19 @@ submit_io:
1311 if (size) 1276 if (size)
1312 goto next_chunk; 1277 goto next_chunk;
1313 } else { 1278 } else {
1314 bio_put(bio); 1279 /*
1280 * if we get here, no pages were added to the bio. However,
1281 * we can't just error out here - if the pages are locked then
1282 * we have to unlock them otherwise we can hang on a later
1283 * access to the page.
1284 */
1315 xfs_buf_ioerror(bp, EIO); 1285 xfs_buf_ioerror(bp, EIO);
1286 if (bp->b_flags & _XBF_PAGE_LOCKED) {
1287 int i;
1288 for (i = 0; i < bp->b_page_count; i++)
1289 unlock_page(bp->b_pages[i]);
1290 }
1291 bio_put(bio);
1316 } 1292 }
1317} 1293}
1318 1294
@@ -1804,7 +1780,7 @@ xfs_buf_delwri_split(
1804 trace_xfs_buf_delwri_split(bp, _RET_IP_); 1780 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1805 ASSERT(bp->b_flags & XBF_DELWRI); 1781 ASSERT(bp->b_flags & XBF_DELWRI);
1806 1782
1807 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1783 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
1808 if (!force && 1784 if (!force &&
1809 time_before(jiffies, bp->b_queuetime + age)) { 1785 time_before(jiffies, bp->b_queuetime + age)) {
1810 xfs_buf_unlock(bp); 1786 xfs_buf_unlock(bp);
@@ -1889,7 +1865,7 @@ xfsbufd(
1889 struct xfs_buf *bp; 1865 struct xfs_buf *bp;
1890 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1866 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1891 list_del_init(&bp->b_list); 1867 list_del_init(&bp->b_list);
1892 xfs_buf_iostrategy(bp); 1868 xfs_bdstrat_cb(bp);
1893 count++; 1869 count++;
1894 } 1870 }
1895 if (count) 1871 if (count)
@@ -1936,7 +1912,7 @@ xfs_flush_buftarg(
1936 bp->b_flags &= ~XBF_ASYNC; 1912 bp->b_flags &= ~XBF_ASYNC;
1937 list_add(&bp->b_list, &wait_list); 1913 list_add(&bp->b_list, &wait_list);
1938 } 1914 }
1939 xfs_buf_iostrategy(bp); 1915 xfs_bdstrat_cb(bp);
1940 } 1916 }
1941 1917
1942 if (wait) { 1918 if (wait) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 5fbecefa5dfd..d072e5ff923b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -44,57 +44,57 @@ typedef enum {
44 XBRW_ZERO = 3, /* Zero target memory */ 44 XBRW_ZERO = 3, /* Zero target memory */
45} xfs_buf_rw_t; 45} xfs_buf_rw_t;
46 46
47typedef enum { 47#define XBF_READ (1 << 0) /* buffer intended for reading from device */
48 XBF_READ = (1 << 0), /* buffer intended for reading from device */ 48#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
49 XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ 49#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */
50 XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ 50#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
51 XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ 51#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
52 XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ 52#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
53 XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ 53#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
54 XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ 54#define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55#define XBF_ORDERED (1 << 11)/* use ordered writes */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */ 58
59 59/* flags used only as arguments to access routines */
60 /* flags used only as arguments to access routines */ 60#define XBF_LOCK (1 << 14)/* lock requested */
61 XBF_LOCK = (1 << 14), /* lock requested */ 61#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */
62 XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ 62#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
63 XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ 63
64 64/* flags used only internally */
65 /* flags used only internally */ 65#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
66 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 66#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
67 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ 67#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
68 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 68#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
69 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 69
70 70/*
71 /* 71 * Special flag for supporting metadata blocks smaller than a FSB.
72 * Special flag for supporting metadata blocks smaller than a FSB. 72 *
73 * 73 * In this case we can have multiple xfs_buf_t on a single page and
74 * In this case we can have multiple xfs_buf_t on a single page and 74 * need to lock out concurrent xfs_buf_t readers as they only
75 * need to lock out concurrent xfs_buf_t readers as they only 75 * serialise access to the buffer.
76 * serialise access to the buffer. 76 *
77 * 77 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
78 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation 78 * between reads of the page. Hence we can have one thread read the
79 * between reads of the page. Hence we can have one thread read the 79 * page and modify it, but then race with another thread that thinks
80 * page and modify it, but then race with another thread that thinks 80 * the page is not up-to-date and hence reads it again.
81 * the page is not up-to-date and hence reads it again. 81 *
82 * 82 * The result is that the first modifcation to the page is lost.
83 * The result is that the first modifcation to the page is lost. 83 * This sort of AGF/AGI reading race can happen when unlinking inodes
84 * This sort of AGF/AGI reading race can happen when unlinking inodes 84 * that require truncation and results in the AGI unlinked list
85 * that require truncation and results in the AGI unlinked list 85 * modifications being lost.
86 * modifications being lost. 86 */
87 */ 87#define _XBF_PAGE_LOCKED (1 << 22)
88 _XBF_PAGE_LOCKED = (1 << 22), 88
89 89/*
90 /* 90 * If we try a barrier write, but it fails we have to communicate
91 * If we try a barrier write, but it fails we have to communicate 91 * this to the upper layers. Unfortunately b_error gets overwritten
92 * this to the upper layers. Unfortunately b_error gets overwritten 92 * when the buffer is re-issued so we have to add another flag to
93 * when the buffer is re-issued so we have to add another flag to 93 * keep this information.
94 * keep this information. 94 */
95 */ 95#define _XFS_BARRIER_FAILED (1 << 23)
96 _XFS_BARRIER_FAILED = (1 << 23), 96
97} xfs_buf_flags_t; 97typedef unsigned int xfs_buf_flags_t;
98 98
99#define XFS_BUF_FLAGS \ 99#define XFS_BUF_FLAGS \
100 { XBF_READ, "READ" }, \ 100 { XBF_READ, "READ" }, \
@@ -187,7 +187,6 @@ typedef struct xfs_buf {
187 atomic_t b_io_remaining; /* #outstanding I/O requests */ 187 atomic_t b_io_remaining; /* #outstanding I/O requests */
188 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 188 xfs_buf_iodone_t b_iodone; /* I/O completion function */
189 xfs_buf_relse_t b_relse; /* releasing function */ 189 xfs_buf_relse_t b_relse; /* releasing function */
190 xfs_buf_bdstrat_t b_strat; /* pre-write function */
191 struct completion b_iowait; /* queue for I/O waiters */ 190 struct completion b_iowait; /* queue for I/O waiters */
192 void *b_fspriv; 191 void *b_fspriv;
193 void *b_fspriv2; 192 void *b_fspriv2;
@@ -245,11 +244,6 @@ extern int xfs_buf_iowait(xfs_buf_t *);
245extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, 244extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
246 xfs_buf_rw_t); 245 xfs_buf_rw_t);
247 246
248static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
249{
250 return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
251}
252
253static inline int xfs_buf_geterror(xfs_buf_t *bp) 247static inline int xfs_buf_geterror(xfs_buf_t *bp)
254{ 248{
255 return bp ? bp->b_error : ENOMEM; 249 return bp ? bp->b_error : ENOMEM;
@@ -258,11 +252,6 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
258/* Buffer Utility Routines */ 252/* Buffer Utility Routines */
259extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); 253extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
260 254
261/* Pinning Buffer Storage in Memory */
262extern void xfs_buf_pin(xfs_buf_t *);
263extern void xfs_buf_unpin(xfs_buf_t *);
264extern int xfs_buf_ispin(xfs_buf_t *);
265
266/* Delayed Write Buffer Routines */ 255/* Delayed Write Buffer Routines */
267extern void xfs_buf_delwri_dequeue(xfs_buf_t *); 256extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
268extern void xfs_buf_delwri_promote(xfs_buf_t *); 257extern void xfs_buf_delwri_promote(xfs_buf_t *);
@@ -326,8 +315,6 @@ extern void xfs_buf_terminate(void);
326#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) 315#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
327#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) 316#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
328#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) 317#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
329#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
330#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
331 318
332#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) 319#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
333#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) 320#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
@@ -351,7 +338,7 @@ extern void xfs_buf_terminate(void);
351#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) 338#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
352#define XFS_BUF_SET_REF(bp, ref) do { } while (0) 339#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
353 340
354#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) 341#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
355 342
356#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) 343#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
357#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) 344#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
@@ -370,8 +357,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
370 xfs_buf_rele(bp); 357 xfs_buf_rele(bp);
371} 358}
372 359
373#define xfs_bpin(bp) xfs_buf_pin(bp)
374#define xfs_bunpin(bp) xfs_buf_unpin(bp)
375#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) 360#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
376 361
377#define xfs_biomove(bp, off, len, data, rw) \ 362#define xfs_biomove(bp, off, len, data, rw) \
diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
deleted file mode 100644
index a8b0b1685eed..000000000000
--- a/fs/xfs/linux-2.6/xfs_dmapi_priv.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DMAPI_PRIV_H__
19#define __XFS_DMAPI_PRIV_H__
20
21/*
22 * Based on IO_ISDIRECT, decide which i_ flag is set.
23 */
24#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
25 DM_FLAGS_IMUX : 0)
26#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
27
28#endif /*__XFS_DMAPI_PRIV_H__*/
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index e7839ee49e43..3764d74790ec 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -23,13 +23,13 @@
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dir2.h" 25#include "xfs_dir2.h"
26#include "xfs_dmapi.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_export.h" 27#include "xfs_export.h"
29#include "xfs_vnodeops.h" 28#include "xfs_vnodeops.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_inode.h" 30#include "xfs_inode.h"
32#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
32#include "xfs_trace.h"
33 33
34/* 34/*
35 * Note that we only accept fileids which are long enough rather than allow 35 * Note that we only accept fileids which are long enough rather than allow
@@ -132,8 +132,7 @@ xfs_nfs_get_inode(
132 * fine and not an indication of a corrupted filesystem as clients can 132 * fine and not an indication of a corrupted filesystem as clients can
133 * send invalid file handles and we have to handle it gracefully.. 133 * send invalid file handles and we have to handle it gracefully..
134 */ 134 */
135 error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 135 error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
136 XFS_ILOCK_SHARED, &ip);
137 if (error) { 136 if (error) {
138 /* 137 /*
139 * EINVAL means the inode cluster doesn't exist anymore. 138 * EINVAL means the inode cluster doesn't exist anymore.
@@ -148,11 +147,10 @@ xfs_nfs_get_inode(
148 } 147 }
149 148
150 if (ip->i_d.di_gen != generation) { 149 if (ip->i_d.di_gen != generation) {
151 xfs_iput_new(ip, XFS_ILOCK_SHARED); 150 IRELE(ip);
152 return ERR_PTR(-ENOENT); 151 return ERR_PTR(-ENOENT);
153 } 152 }
154 153
155 xfs_iunlock(ip, XFS_ILOCK_SHARED);
156 return VFS_I(ip); 154 return VFS_I(ip);
157} 155}
158 156
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 257a56b127cf..ba8ad422a165 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -22,23 +22,15 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dir2.h"
26#include "xfs_trans.h" 25#include "xfs_trans.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 26#include "xfs_mount.h"
29#include "xfs_bmap_btree.h" 27#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_alloc.h" 28#include "xfs_alloc.h"
33#include "xfs_btree.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_dinode.h" 29#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
39#include "xfs_bmap.h" 32#include "xfs_bmap.h"
40#include "xfs_error.h" 33#include "xfs_error.h"
41#include "xfs_rw.h"
42#include "xfs_vnodeops.h" 34#include "xfs_vnodeops.h"
43#include "xfs_da_btree.h" 35#include "xfs_da_btree.h"
44#include "xfs_ioctl.h" 36#include "xfs_ioctl.h"
@@ -108,7 +100,7 @@ xfs_file_fsync(
108 int error = 0; 100 int error = 0;
109 int log_flushed = 0; 101 int log_flushed = 0;
110 102
111 xfs_itrace_entry(ip); 103 trace_xfs_file_fsync(ip);
112 104
113 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 105 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
114 return -XFS_ERROR(EIO); 106 return -XFS_ERROR(EIO);
@@ -166,8 +158,7 @@ xfs_file_fsync(
166 * transaction. So we play it safe and fire off the 158 * transaction. So we play it safe and fire off the
167 * transaction anyway. 159 * transaction anyway.
168 */ 160 */
169 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 161 xfs_trans_ijoin(tp, ip);
170 xfs_trans_ihold(tp, ip);
171 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 162 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
172 xfs_trans_set_sync(tp); 163 xfs_trans_set_sync(tp);
173 error = _xfs_trans_commit(tp, 0, &log_flushed); 164 error = _xfs_trans_commit(tp, 0, &log_flushed);
@@ -275,20 +266,6 @@ xfs_file_aio_read(
275 mutex_lock(&inode->i_mutex); 266 mutex_lock(&inode->i_mutex);
276 xfs_ilock(ip, XFS_IOLOCK_SHARED); 267 xfs_ilock(ip, XFS_IOLOCK_SHARED);
277 268
278 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
279 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
280 int iolock = XFS_IOLOCK_SHARED;
281
282 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
283 dmflags, &iolock);
284 if (ret) {
285 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
286 if (unlikely(ioflags & IO_ISDIRECT))
287 mutex_unlock(&inode->i_mutex);
288 return ret;
289 }
290 }
291
292 if (unlikely(ioflags & IO_ISDIRECT)) { 269 if (unlikely(ioflags & IO_ISDIRECT)) {
293 if (inode->i_mapping->nrpages) { 270 if (inode->i_mapping->nrpages) {
294 ret = -xfs_flushinval_pages(ip, 271 ret = -xfs_flushinval_pages(ip,
@@ -321,7 +298,6 @@ xfs_file_splice_read(
321 unsigned int flags) 298 unsigned int flags)
322{ 299{
323 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); 300 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
324 struct xfs_mount *mp = ip->i_mount;
325 int ioflags = 0; 301 int ioflags = 0;
326 ssize_t ret; 302 ssize_t ret;
327 303
@@ -335,18 +311,6 @@ xfs_file_splice_read(
335 311
336 xfs_ilock(ip, XFS_IOLOCK_SHARED); 312 xfs_ilock(ip, XFS_IOLOCK_SHARED);
337 313
338 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
339 int iolock = XFS_IOLOCK_SHARED;
340 int error;
341
342 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
343 FILP_DELAY_FLAG(infilp), &iolock);
344 if (error) {
345 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
346 return -error;
347 }
348 }
349
350 trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 314 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
351 315
352 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 316 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
@@ -367,7 +331,6 @@ xfs_file_splice_write(
367{ 331{
368 struct inode *inode = outfilp->f_mapping->host; 332 struct inode *inode = outfilp->f_mapping->host;
369 struct xfs_inode *ip = XFS_I(inode); 333 struct xfs_inode *ip = XFS_I(inode);
370 struct xfs_mount *mp = ip->i_mount;
371 xfs_fsize_t isize, new_size; 334 xfs_fsize_t isize, new_size;
372 int ioflags = 0; 335 int ioflags = 0;
373 ssize_t ret; 336 ssize_t ret;
@@ -382,18 +345,6 @@ xfs_file_splice_write(
382 345
383 xfs_ilock(ip, XFS_IOLOCK_EXCL); 346 xfs_ilock(ip, XFS_IOLOCK_EXCL);
384 347
385 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
386 int iolock = XFS_IOLOCK_EXCL;
387 int error;
388
389 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
390 FILP_DELAY_FLAG(outfilp), &iolock);
391 if (error) {
392 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
393 return -error;
394 }
395 }
396
397 new_size = *ppos + count; 348 new_size = *ppos + count;
398 349
399 xfs_ilock(ip, XFS_ILOCK_EXCL); 350 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -463,7 +414,7 @@ xfs_zero_last_block(
463 last_fsb = XFS_B_TO_FSBT(mp, isize); 414 last_fsb = XFS_B_TO_FSBT(mp, isize);
464 nimaps = 1; 415 nimaps = 1;
465 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, 416 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
466 &nimaps, NULL, NULL); 417 &nimaps, NULL);
467 if (error) { 418 if (error) {
468 return error; 419 return error;
469 } 420 }
@@ -558,7 +509,7 @@ xfs_zero_eof(
558 nimaps = 1; 509 nimaps = 1;
559 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 510 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
560 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 511 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
561 0, NULL, 0, &imap, &nimaps, NULL, NULL); 512 0, NULL, 0, &imap, &nimaps, NULL);
562 if (error) { 513 if (error) {
563 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 514 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
564 return error; 515 return error;
@@ -627,7 +578,6 @@ xfs_file_aio_write(
627 int ioflags = 0; 578 int ioflags = 0;
628 xfs_fsize_t isize, new_size; 579 xfs_fsize_t isize, new_size;
629 int iolock; 580 int iolock;
630 int eventsent = 0;
631 size_t ocount = 0, count; 581 size_t ocount = 0, count;
632 int need_i_mutex; 582 int need_i_mutex;
633 583
@@ -673,33 +623,6 @@ start:
673 goto out_unlock_mutex; 623 goto out_unlock_mutex;
674 } 624 }
675 625
676 if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
677 !(ioflags & IO_INVIS) && !eventsent)) {
678 int dmflags = FILP_DELAY_FLAG(file);
679
680 if (need_i_mutex)
681 dmflags |= DM_FLAGS_IMUX;
682
683 xfs_iunlock(ip, XFS_ILOCK_EXCL);
684 error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
685 pos, count, dmflags, &iolock);
686 if (error) {
687 goto out_unlock_internal;
688 }
689 xfs_ilock(ip, XFS_ILOCK_EXCL);
690 eventsent = 1;
691
692 /*
693 * The iolock was dropped and reacquired in XFS_SEND_DATA
694 * so we have to recheck the size when appending.
695 * We will only "goto start;" once, since having sent the
696 * event prevents another call to XFS_SEND_DATA, which is
697 * what allows the size to change in the first place.
698 */
699 if ((file->f_flags & O_APPEND) && pos != ip->i_size)
700 goto start;
701 }
702
703 if (ioflags & IO_ISDIRECT) { 626 if (ioflags & IO_ISDIRECT) {
704 xfs_buftarg_t *target = 627 xfs_buftarg_t *target =
705 XFS_IS_REALTIME_INODE(ip) ? 628 XFS_IS_REALTIME_INODE(ip) ?
@@ -830,22 +753,6 @@ write_retry:
830 xfs_iunlock(ip, XFS_ILOCK_EXCL); 753 xfs_iunlock(ip, XFS_ILOCK_EXCL);
831 } 754 }
832 755
833 if (ret == -ENOSPC &&
834 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
835 xfs_iunlock(ip, iolock);
836 if (need_i_mutex)
837 mutex_unlock(&inode->i_mutex);
838 error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
839 DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
840 0, 0, 0); /* Delay flag intentionally unused */
841 if (need_i_mutex)
842 mutex_lock(&inode->i_mutex);
843 xfs_ilock(ip, iolock);
844 if (error)
845 goto out_unlock_internal;
846 goto start;
847 }
848
849 error = -ret; 756 error = -ret;
850 if (ret <= 0) 757 if (ret <= 0)
851 goto out_unlock_internal; 758 goto out_unlock_internal;
@@ -1014,9 +921,6 @@ const struct file_operations xfs_file_operations = {
1014 .open = xfs_file_open, 921 .open = xfs_file_open,
1015 .release = xfs_file_release, 922 .release = xfs_file_release,
1016 .fsync = xfs_file_fsync, 923 .fsync = xfs_file_fsync,
1017#ifdef HAVE_FOP_OPEN_EXEC
1018 .open_exec = xfs_file_open_exec,
1019#endif
1020}; 924};
1021 925
1022const struct file_operations xfs_dir_file_operations = { 926const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index b6918d76bc7b..1f279b012f94 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -21,10 +21,6 @@
21#include "xfs_inode.h" 21#include "xfs_inode.h"
22#include "xfs_trace.h" 22#include "xfs_trace.h"
23 23
24int fs_noerr(void) { return 0; }
25int fs_nosys(void) { return ENOSYS; }
26void fs_noval(void) { return; }
27
28/* 24/*
29 * note: all filemap functions return negative error codes. These 25 * note: all filemap functions return negative error codes. These
30 * need to be inverted before returning to the xfs core functions. 26 * need to be inverted before returning to the xfs core functions.
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
deleted file mode 100644
index 82bb19b2599e..000000000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_FS_SUBR_H__
19#define __XFS_FS_SUBR_H__
20
21extern int fs_noerr(void);
22extern int fs_nosys(void);
23extern void fs_noval(void);
24
25#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index e59a81062830..237f5ffb2ee8 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -23,24 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_dinode.h" 29#include "xfs_dinode.h"
36#include "xfs_inode.h" 30#include "xfs_inode.h"
37#include "xfs_ioctl.h" 31#include "xfs_ioctl.h"
38#include "xfs_btree.h"
39#include "xfs_ialloc.h"
40#include "xfs_rtalloc.h" 32#include "xfs_rtalloc.h"
41#include "xfs_itable.h" 33#include "xfs_itable.h"
42#include "xfs_error.h" 34#include "xfs_error.h"
43#include "xfs_rw.h"
44#include "xfs_attr.h" 35#include "xfs_attr.h"
45#include "xfs_bmap.h" 36#include "xfs_bmap.h"
46#include "xfs_buf_item.h" 37#include "xfs_buf_item.h"
@@ -908,7 +899,7 @@ xfs_ioctl_setattr(
908 struct xfs_dquot *olddquot = NULL; 899 struct xfs_dquot *olddquot = NULL;
909 int code; 900 int code;
910 901
911 xfs_itrace_entry(ip); 902 trace_xfs_ioctl_setattr(ip);
912 903
913 if (mp->m_flags & XFS_MOUNT_RDONLY) 904 if (mp->m_flags & XFS_MOUNT_RDONLY)
914 return XFS_ERROR(EROFS); 905 return XFS_ERROR(EROFS);
@@ -1043,8 +1034,7 @@ xfs_ioctl_setattr(
1043 } 1034 }
1044 } 1035 }
1045 1036
1046 xfs_trans_ijoin(tp, ip, lock_flags); 1037 xfs_trans_ijoin(tp, ip);
1047 xfs_trans_ihold(tp, ip);
1048 1038
1049 /* 1039 /*
1050 * Change file ownership. Must be the owner or privileged. 1040 * Change file ownership. Must be the owner or privileged.
@@ -1116,16 +1106,7 @@ xfs_ioctl_setattr(
1116 xfs_qm_dqrele(udqp); 1106 xfs_qm_dqrele(udqp);
1117 xfs_qm_dqrele(gdqp); 1107 xfs_qm_dqrele(gdqp);
1118 1108
1119 if (code) 1109 return code;
1120 return code;
1121
1122 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
1123 XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
1124 NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
1125 (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
1126 }
1127
1128 return 0;
1129 1110
1130 error_return: 1111 error_return:
1131 xfs_qm_dqrele(udqp); 1112 xfs_qm_dqrele(udqp);
@@ -1301,7 +1282,7 @@ xfs_file_ioctl(
1301 if (filp->f_mode & FMODE_NOCMTIME) 1282 if (filp->f_mode & FMODE_NOCMTIME)
1302 ioflags |= IO_INVIS; 1283 ioflags |= IO_INVIS;
1303 1284
1304 xfs_itrace_entry(ip); 1285 trace_xfs_file_ioctl(ip);
1305 1286
1306 switch (cmd) { 1287 switch (cmd) {
1307 case XFS_IOC_ALLOCSP: 1288 case XFS_IOC_ALLOCSP:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 52ed49e6465c..6c83f7f62dc9 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -28,12 +28,8 @@
28#include "xfs_trans.h" 28#include "xfs_trans.h"
29#include "xfs_sb.h" 29#include "xfs_sb.h"
30#include "xfs_ag.h" 30#include "xfs_ag.h"
31#include "xfs_dir2.h"
32#include "xfs_dmapi.h"
33#include "xfs_mount.h" 31#include "xfs_mount.h"
34#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dir2_sf.h"
37#include "xfs_vnode.h" 33#include "xfs_vnode.h"
38#include "xfs_dinode.h" 34#include "xfs_dinode.h"
39#include "xfs_inode.h" 35#include "xfs_inode.h"
@@ -544,7 +540,7 @@ xfs_file_compat_ioctl(
544 if (filp->f_mode & FMODE_NOCMTIME) 540 if (filp->f_mode & FMODE_NOCMTIME)
545 ioflags |= IO_INVIS; 541 ioflags |= IO_INVIS;
546 542
547 xfs_itrace_entry(ip); 543 trace_xfs_file_compat_ioctl(ip);
548 544
549 switch (cmd) { 545 switch (cmd) {
550 /* No size or alignment issues on any arch */ 546 /* No size or alignment issues on any arch */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 44f0b2de153e..68be25dcd301 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -24,21 +24,13 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_alloc.h" 27#include "xfs_alloc.h"
29#include "xfs_dmapi.h"
30#include "xfs_quota.h" 28#include "xfs_quota.h"
31#include "xfs_mount.h" 29#include "xfs_mount.h"
32#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 31#include "xfs_dinode.h"
38#include "xfs_inode.h" 32#include "xfs_inode.h"
39#include "xfs_bmap.h" 33#include "xfs_bmap.h"
40#include "xfs_btree.h"
41#include "xfs_ialloc.h"
42#include "xfs_rtalloc.h" 34#include "xfs_rtalloc.h"
43#include "xfs_error.h" 35#include "xfs_error.h"
44#include "xfs_itable.h" 36#include "xfs_itable.h"
@@ -88,7 +80,7 @@ xfs_mark_inode_dirty_sync(
88{ 80{
89 struct inode *inode = VFS_I(ip); 81 struct inode *inode = VFS_I(ip);
90 82
91 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) 83 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
92 mark_inode_dirty_sync(inode); 84 mark_inode_dirty_sync(inode);
93} 85}
94 86
@@ -98,7 +90,7 @@ xfs_mark_inode_dirty(
98{ 90{
99 struct inode *inode = VFS_I(ip); 91 struct inode *inode = VFS_I(ip);
100 92
101 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) 93 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
102 mark_inode_dirty(inode); 94 mark_inode_dirty(inode);
103} 95}
104 96
@@ -496,7 +488,7 @@ xfs_vn_getattr(
496 struct xfs_inode *ip = XFS_I(inode); 488 struct xfs_inode *ip = XFS_I(inode);
497 struct xfs_mount *mp = ip->i_mount; 489 struct xfs_mount *mp = ip->i_mount;
498 490
499 xfs_itrace_entry(ip); 491 trace_xfs_getattr(ip);
500 492
501 if (XFS_FORCED_SHUTDOWN(mp)) 493 if (XFS_FORCED_SHUTDOWN(mp))
502 return XFS_ERROR(EIO); 494 return XFS_ERROR(EIO);
@@ -548,21 +540,6 @@ xfs_vn_setattr(
548 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); 540 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
549} 541}
550 542
551/*
552 * block_truncate_page can return an error, but we can't propagate it
553 * at all here. Leave a complaint + stack trace in the syslog because
554 * this could be bad. If it is bad, we need to propagate the error further.
555 */
556STATIC void
557xfs_vn_truncate(
558 struct inode *inode)
559{
560 int error;
561 error = block_truncate_page(inode->i_mapping, inode->i_size,
562 xfs_get_blocks);
563 WARN_ON(error);
564}
565
566STATIC long 543STATIC long
567xfs_vn_fallocate( 544xfs_vn_fallocate(
568 struct inode *inode, 545 struct inode *inode,
@@ -702,7 +679,6 @@ xfs_vn_fiemap(
702 679
703static const struct inode_operations xfs_inode_operations = { 680static const struct inode_operations xfs_inode_operations = {
704 .check_acl = xfs_check_acl, 681 .check_acl = xfs_check_acl,
705 .truncate = xfs_vn_truncate,
706 .getattr = xfs_vn_getattr, 682 .getattr = xfs_vn_getattr,
707 .setattr = xfs_vn_setattr, 683 .setattr = xfs_vn_setattr,
708 .setxattr = generic_setxattr, 684 .setxattr = generic_setxattr,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index facfb323a706..2fa0bd9ebc7f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -87,7 +87,6 @@
87#include <xfs_aops.h> 87#include <xfs_aops.h>
88#include <xfs_super.h> 88#include <xfs_super.h>
89#include <xfs_globals.h> 89#include <xfs_globals.h>
90#include <xfs_fs_subr.h>
91#include <xfs_buf.h> 90#include <xfs_buf.h>
92 91
93/* 92/*
@@ -157,8 +156,6 @@
157 */ 156 */
158#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) 157#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
159#define xfs_stack_trace() dump_stack() 158#define xfs_stack_trace() dump_stack()
160#define xfs_itruncate_data(ip, off) \
161 (-vmtruncate(VFS_I(ip), (off)))
162 159
163 160
164/* Move the kernel do_div definition off to one side */ 161/* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 067cafbfc635..29b9d642e93d 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -16,7 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_dmapi.h"
20#include "xfs_sb.h" 19#include "xfs_sb.h"
21#include "xfs_inum.h" 20#include "xfs_inum.h"
22#include "xfs_log.h" 21#include "xfs_log.h"
@@ -69,15 +68,15 @@ xfs_fs_set_xstate(
69 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) 68 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
70 return -ENOSYS; 69 return -ENOSYS;
71 70
72 if (uflags & XFS_QUOTA_UDQ_ACCT) 71 if (uflags & FS_QUOTA_UDQ_ACCT)
73 flags |= XFS_UQUOTA_ACCT; 72 flags |= XFS_UQUOTA_ACCT;
74 if (uflags & XFS_QUOTA_PDQ_ACCT) 73 if (uflags & FS_QUOTA_PDQ_ACCT)
75 flags |= XFS_PQUOTA_ACCT; 74 flags |= XFS_PQUOTA_ACCT;
76 if (uflags & XFS_QUOTA_GDQ_ACCT) 75 if (uflags & FS_QUOTA_GDQ_ACCT)
77 flags |= XFS_GQUOTA_ACCT; 76 flags |= XFS_GQUOTA_ACCT;
78 if (uflags & XFS_QUOTA_UDQ_ENFD) 77 if (uflags & FS_QUOTA_UDQ_ENFD)
79 flags |= XFS_UQUOTA_ENFD; 78 flags |= XFS_UQUOTA_ENFD;
80 if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) 79 if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
81 flags |= XFS_OQUOTA_ENFD; 80 flags |= XFS_OQUOTA_ENFD;
82 81
83 switch (op) { 82 switch (op) {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 80938c736c27..15c35b62ff14 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -25,14 +25,11 @@
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 27#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 28#include "xfs_quota.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 33#include "xfs_dinode.h"
37#include "xfs_inode.h" 34#include "xfs_inode.h"
38#include "xfs_btree.h" 35#include "xfs_btree.h"
@@ -43,7 +40,6 @@
43#include "xfs_error.h" 40#include "xfs_error.h"
44#include "xfs_itable.h" 41#include "xfs_itable.h"
45#include "xfs_fsops.h" 42#include "xfs_fsops.h"
46#include "xfs_rw.h"
47#include "xfs_attr.h" 43#include "xfs_attr.h"
48#include "xfs_buf_item.h" 44#include "xfs_buf_item.h"
49#include "xfs_utils.h" 45#include "xfs_utils.h"
@@ -94,7 +90,6 @@ mempool_t *xfs_ioend_pool;
94#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 90#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
95 * unwritten extent conversion */ 91 * unwritten extent conversion */
96#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ 92#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
97#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
98#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 93#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
99#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 94#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
100#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ 95#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
@@ -116,9 +111,6 @@ mempool_t *xfs_ioend_pool;
116#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ 111#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
117#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ 112#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
118#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ 113#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
119#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */
120#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
121#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
122#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ 114#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */
123#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ 115#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
124 116
@@ -172,15 +164,13 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
172STATIC int 164STATIC int
173xfs_parseargs( 165xfs_parseargs(
174 struct xfs_mount *mp, 166 struct xfs_mount *mp,
175 char *options, 167 char *options)
176 char **mtpt)
177{ 168{
178 struct super_block *sb = mp->m_super; 169 struct super_block *sb = mp->m_super;
179 char *this_char, *value, *eov; 170 char *this_char, *value, *eov;
180 int dsunit = 0; 171 int dsunit = 0;
181 int dswidth = 0; 172 int dswidth = 0;
182 int iosize = 0; 173 int iosize = 0;
183 int dmapi_implies_ikeep = 1;
184 __uint8_t iosizelog = 0; 174 __uint8_t iosizelog = 0;
185 175
186 /* 176 /*
@@ -243,15 +233,10 @@ xfs_parseargs(
243 if (!mp->m_logname) 233 if (!mp->m_logname)
244 return ENOMEM; 234 return ENOMEM;
245 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 235 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
246 if (!value || !*value) { 236 cmn_err(CE_WARN,
247 cmn_err(CE_WARN, 237 "XFS: %s option not allowed on this system",
248 "XFS: %s option requires an argument", 238 this_char);
249 this_char); 239 return EINVAL;
250 return EINVAL;
251 }
252 *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
253 if (!*mtpt)
254 return ENOMEM;
255 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 240 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
256 if (!value || !*value) { 241 if (!value || !*value) {
257 cmn_err(CE_WARN, 242 cmn_err(CE_WARN,
@@ -288,8 +273,6 @@ xfs_parseargs(
288 mp->m_flags &= ~XFS_MOUNT_GRPID; 273 mp->m_flags &= ~XFS_MOUNT_GRPID;
289 } else if (!strcmp(this_char, MNTOPT_WSYNC)) { 274 } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
290 mp->m_flags |= XFS_MOUNT_WSYNC; 275 mp->m_flags |= XFS_MOUNT_WSYNC;
291 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
292 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
293 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { 276 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
294 mp->m_flags |= XFS_MOUNT_NORECOVERY; 277 mp->m_flags |= XFS_MOUNT_NORECOVERY;
295 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { 278 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
@@ -329,7 +312,6 @@ xfs_parseargs(
329 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 312 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
330 mp->m_flags |= XFS_MOUNT_IKEEP; 313 mp->m_flags |= XFS_MOUNT_IKEEP;
331 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 314 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
332 dmapi_implies_ikeep = 0;
333 mp->m_flags &= ~XFS_MOUNT_IKEEP; 315 mp->m_flags &= ~XFS_MOUNT_IKEEP;
334 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { 316 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
335 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; 317 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
@@ -370,12 +352,6 @@ xfs_parseargs(
370 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { 352 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
371 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 353 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
372 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 354 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
373 } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
374 mp->m_flags |= XFS_MOUNT_DMAPI;
375 } else if (!strcmp(this_char, MNTOPT_XDSM)) {
376 mp->m_flags |= XFS_MOUNT_DMAPI;
377 } else if (!strcmp(this_char, MNTOPT_DMI)) {
378 mp->m_flags |= XFS_MOUNT_DMAPI;
379 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 355 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
380 mp->m_flags |= XFS_MOUNT_DELAYLOG; 356 mp->m_flags |= XFS_MOUNT_DELAYLOG;
381 cmn_err(CE_WARN, 357 cmn_err(CE_WARN,
@@ -387,9 +363,11 @@ xfs_parseargs(
387 cmn_err(CE_WARN, 363 cmn_err(CE_WARN,
388 "XFS: ihashsize no longer used, option is deprecated."); 364 "XFS: ihashsize no longer used, option is deprecated.");
389 } else if (!strcmp(this_char, "osyncisdsync")) { 365 } else if (!strcmp(this_char, "osyncisdsync")) {
390 /* no-op, this is now the default */
391 cmn_err(CE_WARN, 366 cmn_err(CE_WARN,
392 "XFS: osyncisdsync is now the default, option is deprecated."); 367 "XFS: osyncisdsync has no effect, option is deprecated.");
368 } else if (!strcmp(this_char, "osyncisosync")) {
369 cmn_err(CE_WARN,
370 "XFS: osyncisosync has no effect, option is deprecated.");
393 } else if (!strcmp(this_char, "irixsgid")) { 371 } else if (!strcmp(this_char, "irixsgid")) {
394 cmn_err(CE_WARN, 372 cmn_err(CE_WARN,
395 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); 373 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
@@ -430,12 +408,6 @@ xfs_parseargs(
430 return EINVAL; 408 return EINVAL;
431 } 409 }
432 410
433 if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
434 printk("XFS: %s option needs the mount point option as well\n",
435 MNTOPT_DMAPI);
436 return EINVAL;
437 }
438
439 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 411 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
440 cmn_err(CE_WARN, 412 cmn_err(CE_WARN,
441 "XFS: sunit and swidth must be specified together"); 413 "XFS: sunit and swidth must be specified together");
@@ -449,18 +421,6 @@ xfs_parseargs(
449 return EINVAL; 421 return EINVAL;
450 } 422 }
451 423
452 /*
453 * Applications using DMI filesystems often expect the
454 * inode generation number to be monotonically increasing.
455 * If we delete inode chunks we break this assumption, so
456 * keep unused inode chunks on disk for DMI filesystems
457 * until we come up with a better solution.
458 * Note that if "ikeep" or "noikeep" mount options are
459 * supplied, then they are honored.
460 */
461 if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
462 mp->m_flags |= XFS_MOUNT_IKEEP;
463
464done: 424done:
465 if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { 425 if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
466 /* 426 /*
@@ -539,10 +499,8 @@ xfs_showargs(
539 { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, 499 { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC },
540 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 500 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
541 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, 501 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
542 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
543 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, 502 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
544 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 503 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
545 { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI },
546 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 504 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
547 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, 505 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
548 { 0, NULL } 506 { 0, NULL }
@@ -947,7 +905,7 @@ xfs_fs_destroy_inode(
947{ 905{
948 struct xfs_inode *ip = XFS_I(inode); 906 struct xfs_inode *ip = XFS_I(inode);
949 907
950 xfs_itrace_entry(ip); 908 trace_xfs_destroy_inode(ip);
951 909
952 XFS_STATS_INC(vn_reclaim); 910 XFS_STATS_INC(vn_reclaim);
953 911
@@ -1063,10 +1021,8 @@ xfs_log_inode(
1063 * an inode in another recent transaction. So we play it safe and 1021 * an inode in another recent transaction. So we play it safe and
1064 * fire off the transaction anyway. 1022 * fire off the transaction anyway.
1065 */ 1023 */
1066 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1024 xfs_trans_ijoin(tp, ip);
1067 xfs_trans_ihold(tp, ip);
1068 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1025 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1069 xfs_trans_set_sync(tp);
1070 error = xfs_trans_commit(tp, 0); 1026 error = xfs_trans_commit(tp, 0);
1071 xfs_ilock_demote(ip, XFS_ILOCK_EXCL); 1027 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1072 1028
@@ -1082,27 +1038,18 @@ xfs_fs_write_inode(
1082 struct xfs_mount *mp = ip->i_mount; 1038 struct xfs_mount *mp = ip->i_mount;
1083 int error = EAGAIN; 1039 int error = EAGAIN;
1084 1040
1085 xfs_itrace_entry(ip); 1041 trace_xfs_write_inode(ip);
1086 1042
1087 if (XFS_FORCED_SHUTDOWN(mp)) 1043 if (XFS_FORCED_SHUTDOWN(mp))
1088 return XFS_ERROR(EIO); 1044 return XFS_ERROR(EIO);
1089 1045
1090 if (wbc->sync_mode == WB_SYNC_ALL) { 1046 if (wbc->sync_mode == WB_SYNC_ALL) {
1091 /* 1047 /*
1092 * Make sure the inode has hit stable storage. By using the 1048 * Make sure the inode has made it it into the log. Instead
1093 * log and the fsync transactions we reduce the IOs we have 1049 * of forcing it all the way to stable storage using a
1094 * to do here from two (log and inode) to just the log. 1050 * synchronous transaction we let the log force inside the
1095 * 1051 * ->sync_fs call do that for thus, which reduces the number
1096 * Note: We still need to do a delwri write of the inode after 1052 * of synchronous log foces dramatically.
1097 * this to flush it to the backing buffer so that bulkstat
1098 * works properly if this is the first time the inode has been
1099 * written. Because we hold the ilock atomically over the
1100 * transaction commit and the inode flush we are guaranteed
1101 * that the inode is not pinned when it returns. If the flush
1102 * lock is already held, then the inode has already been
1103 * flushed once and we don't need to flush it again. Hence
1104 * the code will only flush the inode if it isn't already
1105 * being flushed.
1106 */ 1053 */
1107 xfs_ioend_wait(ip); 1054 xfs_ioend_wait(ip);
1108 xfs_ilock(ip, XFS_ILOCK_SHARED); 1055 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1116,27 +1063,29 @@ xfs_fs_write_inode(
1116 * We make this non-blocking if the inode is contended, return 1063 * We make this non-blocking if the inode is contended, return
1117 * EAGAIN to indicate to the caller that they did not succeed. 1064 * EAGAIN to indicate to the caller that they did not succeed.
1118 * This prevents the flush path from blocking on inodes inside 1065 * This prevents the flush path from blocking on inodes inside
1119 * another operation right now, they get caught later by xfs_sync. 1066 * another operation right now, they get caught later by
1067 * xfs_sync.
1120 */ 1068 */
1121 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) 1069 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1122 goto out; 1070 goto out;
1123 }
1124 1071
1125 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 1072 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1126 goto out_unlock; 1073 goto out_unlock;
1127 1074
1128 /* 1075 /*
1129 * Now we have the flush lock and the inode is not pinned, we can check 1076 * Now we have the flush lock and the inode is not pinned, we
1130 * if the inode is really clean as we know that there are no pending 1077 * can check if the inode is really clean as we know that
1131 * transaction completions, it is not waiting on the delayed write 1078 * there are no pending transaction completions, it is not
1132 * queue and there is no IO in progress. 1079 * waiting on the delayed write queue and there is no IO in
1133 */ 1080 * progress.
1134 if (xfs_inode_clean(ip)) { 1081 */
1135 xfs_ifunlock(ip); 1082 if (xfs_inode_clean(ip)) {
1136 error = 0; 1083 xfs_ifunlock(ip);
1137 goto out_unlock; 1084 error = 0;
1085 goto out_unlock;
1086 }
1087 error = xfs_iflush(ip, 0);
1138 } 1088 }
1139 error = xfs_iflush(ip, 0);
1140 1089
1141 out_unlock: 1090 out_unlock:
1142 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1091 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1151,12 +1100,15 @@ xfs_fs_write_inode(
1151} 1100}
1152 1101
1153STATIC void 1102STATIC void
1154xfs_fs_clear_inode( 1103xfs_fs_evict_inode(
1155 struct inode *inode) 1104 struct inode *inode)
1156{ 1105{
1157 xfs_inode_t *ip = XFS_I(inode); 1106 xfs_inode_t *ip = XFS_I(inode);
1158 1107
1159 xfs_itrace_entry(ip); 1108 trace_xfs_evict_inode(ip);
1109
1110 truncate_inode_pages(&inode->i_data, 0);
1111 end_writeback(inode);
1160 XFS_STATS_INC(vn_rele); 1112 XFS_STATS_INC(vn_rele);
1161 XFS_STATS_INC(vn_remove); 1113 XFS_STATS_INC(vn_remove);
1162 XFS_STATS_DEC(vn_active); 1114 XFS_STATS_DEC(vn_active);
@@ -1193,22 +1145,13 @@ xfs_fs_put_super(
1193{ 1145{
1194 struct xfs_mount *mp = XFS_M(sb); 1146 struct xfs_mount *mp = XFS_M(sb);
1195 1147
1148 /*
1149 * Unregister the memory shrinker before we tear down the mount
1150 * structure so we don't have memory reclaim racing with us here.
1151 */
1152 xfs_inode_shrinker_unregister(mp);
1196 xfs_syncd_stop(mp); 1153 xfs_syncd_stop(mp);
1197 1154
1198 if (!(sb->s_flags & MS_RDONLY)) {
1199 /*
1200 * XXX(hch): this should be SYNC_WAIT.
1201 *
1202 * Or more likely not needed at all because the VFS is already
1203 * calling ->sync_fs after shutting down all filestem
1204 * operations and just before calling ->put_super.
1205 */
1206 xfs_sync_data(mp, 0);
1207 xfs_sync_attr(mp, 0);
1208 }
1209
1210 XFS_SEND_PREUNMOUNT(mp);
1211
1212 /* 1155 /*
1213 * Blow away any referenced inode in the filestreams cache. 1156 * Blow away any referenced inode in the filestreams cache.
1214 * This can and will cause log traffic as inodes go inactive 1157 * This can and will cause log traffic as inodes go inactive
@@ -1218,14 +1161,10 @@ xfs_fs_put_super(
1218 1161
1219 XFS_bflush(mp->m_ddev_targp); 1162 XFS_bflush(mp->m_ddev_targp);
1220 1163
1221 XFS_SEND_UNMOUNT(mp);
1222
1223 xfs_unmountfs(mp); 1164 xfs_unmountfs(mp);
1224 xfs_freesb(mp); 1165 xfs_freesb(mp);
1225 xfs_inode_shrinker_unregister(mp);
1226 xfs_icsb_destroy_counters(mp); 1166 xfs_icsb_destroy_counters(mp);
1227 xfs_close_devices(mp); 1167 xfs_close_devices(mp);
1228 xfs_dmops_put(mp);
1229 xfs_free_fsname(mp); 1168 xfs_free_fsname(mp);
1230 kfree(mp); 1169 kfree(mp);
1231} 1170}
@@ -1543,7 +1482,6 @@ xfs_fs_fill_super(
1543 struct inode *root; 1482 struct inode *root;
1544 struct xfs_mount *mp = NULL; 1483 struct xfs_mount *mp = NULL;
1545 int flags = 0, error = ENOMEM; 1484 int flags = 0, error = ENOMEM;
1546 char *mtpt = NULL;
1547 1485
1548 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 1486 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1549 if (!mp) 1487 if (!mp)
@@ -1559,7 +1497,7 @@ xfs_fs_fill_super(
1559 mp->m_super = sb; 1497 mp->m_super = sb;
1560 sb->s_fs_info = mp; 1498 sb->s_fs_info = mp;
1561 1499
1562 error = xfs_parseargs(mp, (char *)data, &mtpt); 1500 error = xfs_parseargs(mp, (char *)data);
1563 if (error) 1501 if (error)
1564 goto out_free_fsname; 1502 goto out_free_fsname;
1565 1503
@@ -1571,16 +1509,12 @@ xfs_fs_fill_super(
1571#endif 1509#endif
1572 sb->s_op = &xfs_super_operations; 1510 sb->s_op = &xfs_super_operations;
1573 1511
1574 error = xfs_dmops_get(mp);
1575 if (error)
1576 goto out_free_fsname;
1577
1578 if (silent) 1512 if (silent)
1579 flags |= XFS_MFSI_QUIET; 1513 flags |= XFS_MFSI_QUIET;
1580 1514
1581 error = xfs_open_devices(mp); 1515 error = xfs_open_devices(mp);
1582 if (error) 1516 if (error)
1583 goto out_put_dmops; 1517 goto out_free_fsname;
1584 1518
1585 if (xfs_icsb_init_counters(mp)) 1519 if (xfs_icsb_init_counters(mp))
1586 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 1520 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1608,8 +1542,6 @@ xfs_fs_fill_super(
1608 if (error) 1542 if (error)
1609 goto out_filestream_unmount; 1543 goto out_filestream_unmount;
1610 1544
1611 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
1612
1613 sb->s_magic = XFS_SB_MAGIC; 1545 sb->s_magic = XFS_SB_MAGIC;
1614 sb->s_blocksize = mp->m_sb.sb_blocksize; 1546 sb->s_blocksize = mp->m_sb.sb_blocksize;
1615 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1547 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1638,7 +1570,6 @@ xfs_fs_fill_super(
1638 1570
1639 xfs_inode_shrinker_register(mp); 1571 xfs_inode_shrinker_register(mp);
1640 1572
1641 kfree(mtpt);
1642 return 0; 1573 return 0;
1643 1574
1644 out_filestream_unmount: 1575 out_filestream_unmount:
@@ -1648,11 +1579,8 @@ xfs_fs_fill_super(
1648 out_destroy_counters: 1579 out_destroy_counters:
1649 xfs_icsb_destroy_counters(mp); 1580 xfs_icsb_destroy_counters(mp);
1650 xfs_close_devices(mp); 1581 xfs_close_devices(mp);
1651 out_put_dmops:
1652 xfs_dmops_put(mp);
1653 out_free_fsname: 1582 out_free_fsname:
1654 xfs_free_fsname(mp); 1583 xfs_free_fsname(mp);
1655 kfree(mtpt);
1656 kfree(mp); 1584 kfree(mp);
1657 out: 1585 out:
1658 return -error; 1586 return -error;
@@ -1696,7 +1624,7 @@ static const struct super_operations xfs_super_operations = {
1696 .destroy_inode = xfs_fs_destroy_inode, 1624 .destroy_inode = xfs_fs_destroy_inode,
1697 .dirty_inode = xfs_fs_dirty_inode, 1625 .dirty_inode = xfs_fs_dirty_inode,
1698 .write_inode = xfs_fs_write_inode, 1626 .write_inode = xfs_fs_write_inode,
1699 .clear_inode = xfs_fs_clear_inode, 1627 .evict_inode = xfs_fs_evict_inode,
1700 .put_super = xfs_fs_put_super, 1628 .put_super = xfs_fs_put_super,
1701 .sync_fs = xfs_fs_sync_fs, 1629 .sync_fs = xfs_fs_sync_fs,
1702 .freeze_fs = xfs_fs_freeze, 1630 .freeze_fs = xfs_fs_freeze,
@@ -1759,6 +1687,12 @@ xfs_init_zones(void)
1759 if (!xfs_trans_zone) 1687 if (!xfs_trans_zone)
1760 goto out_destroy_ifork_zone; 1688 goto out_destroy_ifork_zone;
1761 1689
1690 xfs_log_item_desc_zone =
1691 kmem_zone_init(sizeof(struct xfs_log_item_desc),
1692 "xfs_log_item_desc");
1693 if (!xfs_log_item_desc_zone)
1694 goto out_destroy_trans_zone;
1695
1762 /* 1696 /*
1763 * The size of the zone allocated buf log item is the maximum 1697 * The size of the zone allocated buf log item is the maximum
1764 * size possible under XFS. This wastes a little bit of memory, 1698 * size possible under XFS. This wastes a little bit of memory,
@@ -1768,7 +1702,7 @@ xfs_init_zones(void)
1768 (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / 1702 (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
1769 NBWORD) * sizeof(int))), "xfs_buf_item"); 1703 NBWORD) * sizeof(int))), "xfs_buf_item");
1770 if (!xfs_buf_item_zone) 1704 if (!xfs_buf_item_zone)
1771 goto out_destroy_trans_zone; 1705 goto out_destroy_log_item_desc_zone;
1772 1706
1773 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + 1707 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1774 ((XFS_EFD_MAX_FAST_EXTENTS - 1) * 1708 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
@@ -1805,6 +1739,8 @@ xfs_init_zones(void)
1805 kmem_zone_destroy(xfs_efd_zone); 1739 kmem_zone_destroy(xfs_efd_zone);
1806 out_destroy_buf_item_zone: 1740 out_destroy_buf_item_zone:
1807 kmem_zone_destroy(xfs_buf_item_zone); 1741 kmem_zone_destroy(xfs_buf_item_zone);
1742 out_destroy_log_item_desc_zone:
1743 kmem_zone_destroy(xfs_log_item_desc_zone);
1808 out_destroy_trans_zone: 1744 out_destroy_trans_zone:
1809 kmem_zone_destroy(xfs_trans_zone); 1745 kmem_zone_destroy(xfs_trans_zone);
1810 out_destroy_ifork_zone: 1746 out_destroy_ifork_zone:
@@ -1835,6 +1771,7 @@ xfs_destroy_zones(void)
1835 kmem_zone_destroy(xfs_efi_zone); 1771 kmem_zone_destroy(xfs_efi_zone);
1836 kmem_zone_destroy(xfs_efd_zone); 1772 kmem_zone_destroy(xfs_efd_zone);
1837 kmem_zone_destroy(xfs_buf_item_zone); 1773 kmem_zone_destroy(xfs_buf_item_zone);
1774 kmem_zone_destroy(xfs_log_item_desc_zone);
1838 kmem_zone_destroy(xfs_trans_zone); 1775 kmem_zone_destroy(xfs_trans_zone);
1839 kmem_zone_destroy(xfs_ifork_zone); 1776 kmem_zone_destroy(xfs_ifork_zone);
1840 kmem_zone_destroy(xfs_dabuf_zone); 1777 kmem_zone_destroy(xfs_dabuf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 519618e9279e..1ef4a4d2d997 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,12 +56,6 @@ extern void xfs_qm_exit(void);
56# define XFS_BIGFS_STRING 56# define XFS_BIGFS_STRING
57#endif 57#endif
58 58
59#ifdef CONFIG_XFS_DMAPI
60# define XFS_DMAPI_STRING "dmapi support, "
61#else
62# define XFS_DMAPI_STRING
63#endif
64
65#ifdef DEBUG 59#ifdef DEBUG
66# define XFS_DBG_STRING "debug" 60# define XFS_DBG_STRING "debug"
67#else 61#else
@@ -72,7 +66,6 @@ extern void xfs_qm_exit(void);
72 XFS_SECURITY_STRING \ 66 XFS_SECURITY_STRING \
73 XFS_REALTIME_STRING \ 67 XFS_REALTIME_STRING \
74 XFS_BIGFS_STRING \ 68 XFS_BIGFS_STRING \
75 XFS_DMAPI_STRING \
76 XFS_DBG_STRING /* DBG must be last */ 69 XFS_DBG_STRING /* DBG must be last */
77 70
78struct xfs_inode; 71struct xfs_inode;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a51a07c3a70c..dfcbd98d1599 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -24,25 +24,14 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_inode.h" 29#include "xfs_inode.h"
37#include "xfs_dinode.h" 30#include "xfs_dinode.h"
38#include "xfs_error.h" 31#include "xfs_error.h"
39#include "xfs_mru_cache.h"
40#include "xfs_filestream.h" 32#include "xfs_filestream.h"
41#include "xfs_vnodeops.h" 33#include "xfs_vnodeops.h"
42#include "xfs_utils.h"
43#include "xfs_buf_item.h"
44#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
45#include "xfs_rw.h"
46#include "xfs_quota.h" 35#include "xfs_quota.h"
47#include "xfs_trace.h" 36#include "xfs_trace.h"
48 37
@@ -319,7 +308,7 @@ xfs_sync_inode_attr(
319/* 308/*
320 * Write out pagecache data for the whole filesystem. 309 * Write out pagecache data for the whole filesystem.
321 */ 310 */
322int 311STATIC int
323xfs_sync_data( 312xfs_sync_data(
324 struct xfs_mount *mp, 313 struct xfs_mount *mp,
325 int flags) 314 int flags)
@@ -340,7 +329,7 @@ xfs_sync_data(
340/* 329/*
341 * Write out inode metadata (attributes) for the whole filesystem. 330 * Write out inode metadata (attributes) for the whole filesystem.
342 */ 331 */
343int 332STATIC int
344xfs_sync_attr( 333xfs_sync_attr(
345 struct xfs_mount *mp, 334 struct xfs_mount *mp,
346 int flags) 335 int flags)
@@ -373,8 +362,7 @@ xfs_commit_dummy_trans(
373 362
374 xfs_ilock(ip, XFS_ILOCK_EXCL); 363 xfs_ilock(ip, XFS_ILOCK_EXCL);
375 364
376 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 365 xfs_trans_ijoin(tp, ip);
377 xfs_trans_ihold(tp, ip);
378 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 366 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
379 error = xfs_trans_commit(tp, 0); 367 error = xfs_trans_commit(tp, 0);
380 xfs_iunlock(ip, XFS_ILOCK_EXCL); 368 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -867,7 +855,36 @@ out:
867reclaim: 855reclaim:
868 xfs_ifunlock(ip); 856 xfs_ifunlock(ip);
869 xfs_iunlock(ip, XFS_ILOCK_EXCL); 857 xfs_iunlock(ip, XFS_ILOCK_EXCL);
870 xfs_ireclaim(ip); 858
859 XFS_STATS_INC(xs_ig_reclaims);
860 /*
861 * Remove the inode from the per-AG radix tree.
862 *
863 * Because radix_tree_delete won't complain even if the item was never
864 * added to the tree assert that it's been there before to catch
865 * problems with the inode life time early on.
866 */
867 write_lock(&pag->pag_ici_lock);
868 if (!radix_tree_delete(&pag->pag_ici_root,
869 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
870 ASSERT(0);
871 write_unlock(&pag->pag_ici_lock);
872
873 /*
874 * Here we do an (almost) spurious inode lock in order to coordinate
875 * with inode cache radix tree lookups. This is because the lookup
876 * can reference the inodes in the cache without taking references.
877 *
878 * We make that OK here by ensuring that we wait until the inode is
879 * unlocked after the lookup before we go ahead and free it. We get
880 * both the ilock and the iolock because the code may need to drop the
881 * ilock one but will still hold the iolock.
882 */
883 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
884 xfs_qm_dqdetach(ip);
885 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
886
887 xfs_inode_free(ip);
871 return error; 888 return error;
872 889
873} 890}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e28139aaa4aa..fe78726196f8 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -35,9 +35,6 @@ typedef struct xfs_sync_work {
35int xfs_syncd_init(struct xfs_mount *mp); 35int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 36void xfs_syncd_stop(struct xfs_mount *mp);
37 37
38int xfs_sync_attr(struct xfs_mount *mp, int flags);
39int xfs_sync_data(struct xfs_mount *mp, int flags);
40
41int xfs_quiesce_data(struct xfs_mount *mp); 38int xfs_quiesce_data(struct xfs_mount *mp);
42void xfs_quiesce_attr(struct xfs_mount *mp); 39void xfs_quiesce_attr(struct xfs_mount *mp);
43 40
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index d12be8470cba..88d25d4aa56e 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -24,17 +24,13 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_da_btree.h" 27#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 31#include "xfs_dinode.h"
35#include "xfs_inode.h" 32#include "xfs_inode.h"
36#include "xfs_btree.h" 33#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h" 34#include "xfs_mount.h"
39#include "xfs_ialloc.h" 35#include "xfs_ialloc.h"
40#include "xfs_itable.h" 36#include "xfs_itable.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 302820690904..be5dffd282a1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -317,8 +317,6 @@ DEFINE_BUF_EVENT(xfs_buf_init);
317DEFINE_BUF_EVENT(xfs_buf_free); 317DEFINE_BUF_EVENT(xfs_buf_free);
318DEFINE_BUF_EVENT(xfs_buf_hold); 318DEFINE_BUF_EVENT(xfs_buf_hold);
319DEFINE_BUF_EVENT(xfs_buf_rele); 319DEFINE_BUF_EVENT(xfs_buf_rele);
320DEFINE_BUF_EVENT(xfs_buf_pin);
321DEFINE_BUF_EVENT(xfs_buf_unpin);
322DEFINE_BUF_EVENT(xfs_buf_iodone); 320DEFINE_BUF_EVENT(xfs_buf_iodone);
323DEFINE_BUF_EVENT(xfs_buf_iorequest); 321DEFINE_BUF_EVENT(xfs_buf_iorequest);
324DEFINE_BUF_EVENT(xfs_buf_bawrite); 322DEFINE_BUF_EVENT(xfs_buf_bawrite);
@@ -541,7 +539,7 @@ DEFINE_LOCK_EVENT(xfs_ilock_nowait);
541DEFINE_LOCK_EVENT(xfs_ilock_demote); 539DEFINE_LOCK_EVENT(xfs_ilock_demote);
542DEFINE_LOCK_EVENT(xfs_iunlock); 540DEFINE_LOCK_EVENT(xfs_iunlock);
543 541
544DECLARE_EVENT_CLASS(xfs_iget_class, 542DECLARE_EVENT_CLASS(xfs_inode_class,
545 TP_PROTO(struct xfs_inode *ip), 543 TP_PROTO(struct xfs_inode *ip),
546 TP_ARGS(ip), 544 TP_ARGS(ip),
547 TP_STRUCT__entry( 545 TP_STRUCT__entry(
@@ -557,16 +555,38 @@ DECLARE_EVENT_CLASS(xfs_iget_class,
557 __entry->ino) 555 __entry->ino)
558) 556)
559 557
560#define DEFINE_IGET_EVENT(name) \ 558#define DEFINE_INODE_EVENT(name) \
561DEFINE_EVENT(xfs_iget_class, name, \ 559DEFINE_EVENT(xfs_inode_class, name, \
562 TP_PROTO(struct xfs_inode *ip), \ 560 TP_PROTO(struct xfs_inode *ip), \
563 TP_ARGS(ip)) 561 TP_ARGS(ip))
564DEFINE_IGET_EVENT(xfs_iget_skip); 562DEFINE_INODE_EVENT(xfs_iget_skip);
565DEFINE_IGET_EVENT(xfs_iget_reclaim); 563DEFINE_INODE_EVENT(xfs_iget_reclaim);
566DEFINE_IGET_EVENT(xfs_iget_found); 564DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
567DEFINE_IGET_EVENT(xfs_iget_alloc); 565DEFINE_INODE_EVENT(xfs_iget_hit);
568 566DEFINE_INODE_EVENT(xfs_iget_miss);
569DECLARE_EVENT_CLASS(xfs_inode_class, 567
568DEFINE_INODE_EVENT(xfs_getattr);
569DEFINE_INODE_EVENT(xfs_setattr);
570DEFINE_INODE_EVENT(xfs_readlink);
571DEFINE_INODE_EVENT(xfs_alloc_file_space);
572DEFINE_INODE_EVENT(xfs_free_file_space);
573DEFINE_INODE_EVENT(xfs_readdir);
574#ifdef CONFIG_XFS_POSIX_ACL
575DEFINE_INODE_EVENT(xfs_check_acl);
576#endif
577DEFINE_INODE_EVENT(xfs_vm_bmap);
578DEFINE_INODE_EVENT(xfs_file_ioctl);
579DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
580DEFINE_INODE_EVENT(xfs_ioctl_setattr);
581DEFINE_INODE_EVENT(xfs_file_fsync);
582DEFINE_INODE_EVENT(xfs_destroy_inode);
583DEFINE_INODE_EVENT(xfs_write_inode);
584DEFINE_INODE_EVENT(xfs_evict_inode);
585
586DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
587DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
588
589DECLARE_EVENT_CLASS(xfs_iref_class,
570 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), 590 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
571 TP_ARGS(ip, caller_ip), 591 TP_ARGS(ip, caller_ip),
572 TP_STRUCT__entry( 592 TP_STRUCT__entry(
@@ -591,20 +611,71 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
591 (char *)__entry->caller_ip) 611 (char *)__entry->caller_ip)
592) 612)
593 613
594#define DEFINE_INODE_EVENT(name) \ 614#define DEFINE_IREF_EVENT(name) \
595DEFINE_EVENT(xfs_inode_class, name, \ 615DEFINE_EVENT(xfs_iref_class, name, \
596 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 616 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
597 TP_ARGS(ip, caller_ip)) 617 TP_ARGS(ip, caller_ip))
598DEFINE_INODE_EVENT(xfs_ihold); 618DEFINE_IREF_EVENT(xfs_ihold);
599DEFINE_INODE_EVENT(xfs_irele); 619DEFINE_IREF_EVENT(xfs_irele);
600DEFINE_INODE_EVENT(xfs_inode_pin); 620DEFINE_IREF_EVENT(xfs_inode_pin);
601DEFINE_INODE_EVENT(xfs_inode_unpin); 621DEFINE_IREF_EVENT(xfs_inode_unpin);
602DEFINE_INODE_EVENT(xfs_inode_unpin_nowait); 622DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
623
624DECLARE_EVENT_CLASS(xfs_namespace_class,
625 TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
626 TP_ARGS(dp, name),
627 TP_STRUCT__entry(
628 __field(dev_t, dev)
629 __field(xfs_ino_t, dp_ino)
630 __dynamic_array(char, name, name->len)
631 ),
632 TP_fast_assign(
633 __entry->dev = VFS_I(dp)->i_sb->s_dev;
634 __entry->dp_ino = dp->i_ino;
635 memcpy(__get_str(name), name->name, name->len);
636 ),
637 TP_printk("dev %d:%d dp ino 0x%llx name %s",
638 MAJOR(__entry->dev), MINOR(__entry->dev),
639 __entry->dp_ino,
640 __get_str(name))
641)
603 642
604/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 643#define DEFINE_NAMESPACE_EVENT(name) \
605DEFINE_INODE_EVENT(xfs_inode); 644DEFINE_EVENT(xfs_namespace_class, name, \
606#define xfs_itrace_entry(ip) \ 645 TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
607 trace_xfs_inode(ip, _THIS_IP_) 646 TP_ARGS(dp, name))
647DEFINE_NAMESPACE_EVENT(xfs_remove);
648DEFINE_NAMESPACE_EVENT(xfs_link);
649DEFINE_NAMESPACE_EVENT(xfs_lookup);
650DEFINE_NAMESPACE_EVENT(xfs_create);
651DEFINE_NAMESPACE_EVENT(xfs_symlink);
652
653TRACE_EVENT(xfs_rename,
654 TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
655 struct xfs_name *src_name, struct xfs_name *target_name),
656 TP_ARGS(src_dp, target_dp, src_name, target_name),
657 TP_STRUCT__entry(
658 __field(dev_t, dev)
659 __field(xfs_ino_t, src_dp_ino)
660 __field(xfs_ino_t, target_dp_ino)
661 __dynamic_array(char, src_name, src_name->len)
662 __dynamic_array(char, target_name, target_name->len)
663 ),
664 TP_fast_assign(
665 __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
666 __entry->src_dp_ino = src_dp->i_ino;
667 __entry->target_dp_ino = target_dp->i_ino;
668 memcpy(__get_str(src_name), src_name->name, src_name->len);
669 memcpy(__get_str(target_name), target_name->name, target_name->len);
670 ),
671 TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
672 " src name %s target name %s",
673 MAJOR(__entry->dev), MINOR(__entry->dev),
674 __entry->src_dp_ino,
675 __entry->target_dp_ino,
676 __get_str(src_name),
677 __get_str(target_name))
678)
608 679
609DECLARE_EVENT_CLASS(xfs_dquot_class, 680DECLARE_EVENT_CLASS(xfs_dquot_class,
610 TP_PROTO(struct xfs_dquot *dqp), 681 TP_PROTO(struct xfs_dquot *dqp),
@@ -684,9 +755,6 @@ DEFINE_DQUOT_EVENT(xfs_dqrele);
684DEFINE_DQUOT_EVENT(xfs_dqflush); 755DEFINE_DQUOT_EVENT(xfs_dqflush);
685DEFINE_DQUOT_EVENT(xfs_dqflush_force); 756DEFINE_DQUOT_EVENT(xfs_dqflush_force);
686DEFINE_DQUOT_EVENT(xfs_dqflush_done); 757DEFINE_DQUOT_EVENT(xfs_dqflush_done);
687/* not really iget events, but we re-use the format */
688DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
689DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
690 758
691DECLARE_EVENT_CLASS(xfs_loggrant_class, 759DECLARE_EVENT_CLASS(xfs_loggrant_class,
692 TP_PROTO(struct log *log, struct xlog_ticket *tic), 760 TP_PROTO(struct log *log, struct xlog_ticket *tic),
@@ -834,33 +902,29 @@ DECLARE_EVENT_CLASS(xfs_page_class,
834 __field(loff_t, size) 902 __field(loff_t, size)
835 __field(unsigned long, offset) 903 __field(unsigned long, offset)
836 __field(int, delalloc) 904 __field(int, delalloc)
837 __field(int, unmapped)
838 __field(int, unwritten) 905 __field(int, unwritten)
839 ), 906 ),
840 TP_fast_assign( 907 TP_fast_assign(
841 int delalloc = -1, unmapped = -1, unwritten = -1; 908 int delalloc = -1, unwritten = -1;
842 909
843 if (page_has_buffers(page)) 910 if (page_has_buffers(page))
844 xfs_count_page_state(page, &delalloc, 911 xfs_count_page_state(page, &delalloc, &unwritten);
845 &unmapped, &unwritten);
846 __entry->dev = inode->i_sb->s_dev; 912 __entry->dev = inode->i_sb->s_dev;
847 __entry->ino = XFS_I(inode)->i_ino; 913 __entry->ino = XFS_I(inode)->i_ino;
848 __entry->pgoff = page_offset(page); 914 __entry->pgoff = page_offset(page);
849 __entry->size = i_size_read(inode); 915 __entry->size = i_size_read(inode);
850 __entry->offset = off; 916 __entry->offset = off;
851 __entry->delalloc = delalloc; 917 __entry->delalloc = delalloc;
852 __entry->unmapped = unmapped;
853 __entry->unwritten = unwritten; 918 __entry->unwritten = unwritten;
854 ), 919 ),
855 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " 920 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
856 "delalloc %d unmapped %d unwritten %d", 921 "delalloc %d unwritten %d",
857 MAJOR(__entry->dev), MINOR(__entry->dev), 922 MAJOR(__entry->dev), MINOR(__entry->dev),
858 __entry->ino, 923 __entry->ino,
859 __entry->pgoff, 924 __entry->pgoff,
860 __entry->size, 925 __entry->size,
861 __entry->offset, 926 __entry->offset,
862 __entry->delalloc, 927 __entry->delalloc,
863 __entry->unmapped,
864 __entry->unwritten) 928 __entry->unwritten)
865) 929)
866 930
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 585e7633dfc7..e1a2f6800e01 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_btree.h"
39#include "xfs_ialloc.h"
40#include "xfs_bmap.h" 31#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 32#include "xfs_rtalloc.h"
42#include "xfs_error.h" 33#include "xfs_error.h"
43#include "xfs_itable.h" 34#include "xfs_itable.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_trans_space.h" 37#include "xfs_trans_space.h"
@@ -64,8 +54,6 @@
64 flush lock - ditto. 54 flush lock - ditto.
65*/ 55*/
66 56
67STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
68
69#ifdef DEBUG 57#ifdef DEBUG
70xfs_buftarg_t *xfs_dqerror_target; 58xfs_buftarg_t *xfs_dqerror_target;
71int xfs_do_dqerror; 59int xfs_do_dqerror;
@@ -390,21 +378,14 @@ xfs_qm_dqalloc(
390 return (ESRCH); 378 return (ESRCH);
391 } 379 }
392 380
393 /* 381 xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
394 * xfs_trans_commit normally decrements the vnode ref count
395 * when it unlocks the inode. Since we want to keep the quota
396 * inode around, we bump the vnode ref count now.
397 */
398 IHOLD(quotip);
399
400 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
401 nmaps = 1; 382 nmaps = 1;
402 if ((error = xfs_bmapi(tp, quotip, 383 if ((error = xfs_bmapi(tp, quotip,
403 offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, 384 offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
404 XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, 385 XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
405 &firstblock, 386 &firstblock,
406 XFS_QM_DQALLOC_SPACE_RES(mp), 387 XFS_QM_DQALLOC_SPACE_RES(mp),
407 &map, &nmaps, &flist, NULL))) { 388 &map, &nmaps, &flist))) {
408 goto error0; 389 goto error0;
409 } 390 }
410 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 391 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -520,7 +501,7 @@ xfs_qm_dqtobp(
520 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, 501 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
521 XFS_DQUOT_CLUSTER_SIZE_FSB, 502 XFS_DQUOT_CLUSTER_SIZE_FSB,
522 XFS_BMAPI_METADATA, 503 XFS_BMAPI_METADATA,
523 NULL, 0, &map, &nmaps, NULL, NULL); 504 NULL, 0, &map, &nmaps, NULL);
524 505
525 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 506 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
526 if (error) 507 if (error)
@@ -1141,6 +1122,46 @@ xfs_qm_dqrele(
1141 xfs_qm_dqput(dqp); 1122 xfs_qm_dqput(dqp);
1142} 1123}
1143 1124
1125/*
1126 * This is the dquot flushing I/O completion routine. It is called
1127 * from interrupt level when the buffer containing the dquot is
1128 * flushed to disk. It is responsible for removing the dquot logitem
1129 * from the AIL if it has not been re-logged, and unlocking the dquot's
1130 * flush lock. This behavior is very similar to that of inodes..
1131 */
1132STATIC void
1133xfs_qm_dqflush_done(
1134 struct xfs_buf *bp,
1135 struct xfs_log_item *lip)
1136{
1137 xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
1138 xfs_dquot_t *dqp = qip->qli_dquot;
1139 struct xfs_ail *ailp = lip->li_ailp;
1140
1141 /*
1142 * We only want to pull the item from the AIL if its
1143 * location in the log has not changed since we started the flush.
1144 * Thus, we only bother if the dquot's lsn has
1145 * not changed. First we check the lsn outside the lock
1146 * since it's cheaper, and then we recheck while
1147 * holding the lock before removing the dquot from the AIL.
1148 */
1149 if ((lip->li_flags & XFS_LI_IN_AIL) &&
1150 lip->li_lsn == qip->qli_flush_lsn) {
1151
1152 /* xfs_trans_ail_delete() drops the AIL lock. */
1153 spin_lock(&ailp->xa_lock);
1154 if (lip->li_lsn == qip->qli_flush_lsn)
1155 xfs_trans_ail_delete(ailp, lip);
1156 else
1157 spin_unlock(&ailp->xa_lock);
1158 }
1159
1160 /*
1161 * Release the dq's flush lock since we're done with it.
1162 */
1163 xfs_dqfunlock(dqp);
1164}
1144 1165
1145/* 1166/*
1146 * Write a modified dquot to disk. 1167 * Write a modified dquot to disk.
@@ -1222,8 +1243,9 @@ xfs_qm_dqflush(
1222 * Attach an iodone routine so that we can remove this dquot from the 1243 * Attach an iodone routine so that we can remove this dquot from the
1223 * AIL and release the flush lock once the dquot is synced to disk. 1244 * AIL and release the flush lock once the dquot is synced to disk.
1224 */ 1245 */
1225 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *)) 1246 xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
1226 xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item)); 1247 &dqp->q_logitem.qli_item);
1248
1227 /* 1249 /*
1228 * If the buffer is pinned then push on the log so we won't 1250 * If the buffer is pinned then push on the log so we won't
1229 * get stuck waiting in the write for too long. 1251 * get stuck waiting in the write for too long.
@@ -1247,50 +1269,6 @@ xfs_qm_dqflush(
1247 1269
1248} 1270}
1249 1271
1250/*
1251 * This is the dquot flushing I/O completion routine. It is called
1252 * from interrupt level when the buffer containing the dquot is
1253 * flushed to disk. It is responsible for removing the dquot logitem
1254 * from the AIL if it has not been re-logged, and unlocking the dquot's
1255 * flush lock. This behavior is very similar to that of inodes..
1256 */
1257/*ARGSUSED*/
1258STATIC void
1259xfs_qm_dqflush_done(
1260 xfs_buf_t *bp,
1261 xfs_dq_logitem_t *qip)
1262{
1263 xfs_dquot_t *dqp;
1264 struct xfs_ail *ailp;
1265
1266 dqp = qip->qli_dquot;
1267 ailp = qip->qli_item.li_ailp;
1268
1269 /*
1270 * We only want to pull the item from the AIL if its
1271 * location in the log has not changed since we started the flush.
1272 * Thus, we only bother if the dquot's lsn has
1273 * not changed. First we check the lsn outside the lock
1274 * since it's cheaper, and then we recheck while
1275 * holding the lock before removing the dquot from the AIL.
1276 */
1277 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
1278 qip->qli_item.li_lsn == qip->qli_flush_lsn) {
1279
1280 /* xfs_trans_ail_delete() drops the AIL lock. */
1281 spin_lock(&ailp->xa_lock);
1282 if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
1283 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
1284 else
1285 spin_unlock(&ailp->xa_lock);
1286 }
1287
1288 /*
1289 * Release the dq's flush lock since we're done with it.
1290 */
1291 xfs_dqfunlock(dqp);
1292}
1293
1294int 1272int
1295xfs_qm_dqlock_nowait( 1273xfs_qm_dqlock_nowait(
1296 xfs_dquot_t *dqp) 1274 xfs_dquot_t *dqp)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 8d89a24ae324..2a1f3dc10a02 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -23,42 +23,36 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_bmap.h" 31#include "xfs_bmap.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_rtalloc.h" 32#include "xfs_rtalloc.h"
42#include "xfs_error.h" 33#include "xfs_error.h"
43#include "xfs_itable.h" 34#include "xfs_itable.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_trans_priv.h" 37#include "xfs_trans_priv.h"
48#include "xfs_qm.h" 38#include "xfs_qm.h"
49 39
40static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
41{
42 return container_of(lip, struct xfs_dq_logitem, qli_item);
43}
44
50/* 45/*
51 * returns the number of iovecs needed to log the given dquot item. 46 * returns the number of iovecs needed to log the given dquot item.
52 */ 47 */
53/* ARGSUSED */
54STATIC uint 48STATIC uint
55xfs_qm_dquot_logitem_size( 49xfs_qm_dquot_logitem_size(
56 xfs_dq_logitem_t *logitem) 50 struct xfs_log_item *lip)
57{ 51{
58 /* 52 /*
59 * we need only two iovecs, one for the format, one for the real thing 53 * we need only two iovecs, one for the format, one for the real thing
60 */ 54 */
61 return (2); 55 return 2;
62} 56}
63 57
64/* 58/*
@@ -66,22 +60,21 @@ xfs_qm_dquot_logitem_size(
66 */ 60 */
67STATIC void 61STATIC void
68xfs_qm_dquot_logitem_format( 62xfs_qm_dquot_logitem_format(
69 xfs_dq_logitem_t *logitem, 63 struct xfs_log_item *lip,
70 xfs_log_iovec_t *logvec) 64 struct xfs_log_iovec *logvec)
71{ 65{
72 ASSERT(logitem); 66 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
73 ASSERT(logitem->qli_dquot);
74 67
75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; 68 logvec->i_addr = &qlip->qli_format;
76 logvec->i_len = sizeof(xfs_dq_logformat_t); 69 logvec->i_len = sizeof(xfs_dq_logformat_t);
77 logvec->i_type = XLOG_REG_TYPE_QFORMAT; 70 logvec->i_type = XLOG_REG_TYPE_QFORMAT;
78 logvec++; 71 logvec++;
79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; 72 logvec->i_addr = &qlip->qli_dquot->q_core;
80 logvec->i_len = sizeof(xfs_disk_dquot_t); 73 logvec->i_len = sizeof(xfs_disk_dquot_t);
81 logvec->i_type = XLOG_REG_TYPE_DQUOT; 74 logvec->i_type = XLOG_REG_TYPE_DQUOT;
82 75
83 ASSERT(2 == logitem->qli_item.li_desc->lid_size); 76 ASSERT(2 == lip->li_desc->lid_size);
84 logitem->qli_format.qlf_size = 2; 77 qlip->qli_format.qlf_size = 2;
85 78
86} 79}
87 80
@@ -90,9 +83,9 @@ xfs_qm_dquot_logitem_format(
90 */ 83 */
91STATIC void 84STATIC void
92xfs_qm_dquot_logitem_pin( 85xfs_qm_dquot_logitem_pin(
93 xfs_dq_logitem_t *logitem) 86 struct xfs_log_item *lip)
94{ 87{
95 xfs_dquot_t *dqp = logitem->qli_dquot; 88 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
96 89
97 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 90 ASSERT(XFS_DQ_IS_LOCKED(dqp));
98 atomic_inc(&dqp->q_pincount); 91 atomic_inc(&dqp->q_pincount);
@@ -104,27 +97,18 @@ xfs_qm_dquot_logitem_pin(
104 * dquot must have been previously pinned with a call to 97 * dquot must have been previously pinned with a call to
105 * xfs_qm_dquot_logitem_pin(). 98 * xfs_qm_dquot_logitem_pin().
106 */ 99 */
107/* ARGSUSED */
108STATIC void 100STATIC void
109xfs_qm_dquot_logitem_unpin( 101xfs_qm_dquot_logitem_unpin(
110 xfs_dq_logitem_t *logitem) 102 struct xfs_log_item *lip,
103 int remove)
111{ 104{
112 xfs_dquot_t *dqp = logitem->qli_dquot; 105 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
113 106
114 ASSERT(atomic_read(&dqp->q_pincount) > 0); 107 ASSERT(atomic_read(&dqp->q_pincount) > 0);
115 if (atomic_dec_and_test(&dqp->q_pincount)) 108 if (atomic_dec_and_test(&dqp->q_pincount))
116 wake_up(&dqp->q_pinwait); 109 wake_up(&dqp->q_pinwait);
117} 110}
118 111
119/* ARGSUSED */
120STATIC void
121xfs_qm_dquot_logitem_unpin_remove(
122 xfs_dq_logitem_t *logitem,
123 xfs_trans_t *tp)
124{
125 xfs_qm_dquot_logitem_unpin(logitem);
126}
127
128/* 112/*
129 * Given the logitem, this writes the corresponding dquot entry to disk 113 * Given the logitem, this writes the corresponding dquot entry to disk
130 * asynchronously. This is called with the dquot entry securely locked; 114 * asynchronously. This is called with the dquot entry securely locked;
@@ -133,12 +117,10 @@ xfs_qm_dquot_logitem_unpin_remove(
133 */ 117 */
134STATIC void 118STATIC void
135xfs_qm_dquot_logitem_push( 119xfs_qm_dquot_logitem_push(
136 xfs_dq_logitem_t *logitem) 120 struct xfs_log_item *lip)
137{ 121{
138 xfs_dquot_t *dqp; 122 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
139 int error; 123 int error;
140
141 dqp = logitem->qli_dquot;
142 124
143 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 125 ASSERT(XFS_DQ_IS_LOCKED(dqp));
144 ASSERT(!completion_done(&dqp->q_flush)); 126 ASSERT(!completion_done(&dqp->q_flush));
@@ -160,27 +142,25 @@ xfs_qm_dquot_logitem_push(
160 xfs_dqunlock(dqp); 142 xfs_dqunlock(dqp);
161} 143}
162 144
163/*ARGSUSED*/
164STATIC xfs_lsn_t 145STATIC xfs_lsn_t
165xfs_qm_dquot_logitem_committed( 146xfs_qm_dquot_logitem_committed(
166 xfs_dq_logitem_t *l, 147 struct xfs_log_item *lip,
167 xfs_lsn_t lsn) 148 xfs_lsn_t lsn)
168{ 149{
169 /* 150 /*
170 * We always re-log the entire dquot when it becomes dirty, 151 * We always re-log the entire dquot when it becomes dirty,
171 * so, the latest copy _is_ the only one that matters. 152 * so, the latest copy _is_ the only one that matters.
172 */ 153 */
173 return (lsn); 154 return lsn;
174} 155}
175 156
176
177/* 157/*
178 * This is called to wait for the given dquot to be unpinned. 158 * This is called to wait for the given dquot to be unpinned.
179 * Most of these pin/unpin routines are plagiarized from inode code. 159 * Most of these pin/unpin routines are plagiarized from inode code.
180 */ 160 */
181void 161void
182xfs_qm_dqunpin_wait( 162xfs_qm_dqunpin_wait(
183 xfs_dquot_t *dqp) 163 struct xfs_dquot *dqp)
184{ 164{
185 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 165 ASSERT(XFS_DQ_IS_LOCKED(dqp));
186 if (atomic_read(&dqp->q_pincount) == 0) 166 if (atomic_read(&dqp->q_pincount) == 0)
@@ -206,13 +186,12 @@ xfs_qm_dqunpin_wait(
206 */ 186 */
207STATIC void 187STATIC void
208xfs_qm_dquot_logitem_pushbuf( 188xfs_qm_dquot_logitem_pushbuf(
209 xfs_dq_logitem_t *qip) 189 struct xfs_log_item *lip)
210{ 190{
211 xfs_dquot_t *dqp; 191 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
212 xfs_mount_t *mp; 192 struct xfs_dquot *dqp = qlip->qli_dquot;
213 xfs_buf_t *bp; 193 struct xfs_buf *bp;
214 194
215 dqp = qip->qli_dquot;
216 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 195 ASSERT(XFS_DQ_IS_LOCKED(dqp));
217 196
218 /* 197 /*
@@ -220,22 +199,20 @@ xfs_qm_dquot_logitem_pushbuf(
220 * inode flush completed and the inode was taken off the AIL. 199 * inode flush completed and the inode was taken off the AIL.
221 * So, just get out. 200 * So, just get out.
222 */ 201 */
223 if (completion_done(&dqp->q_flush) || 202 if (completion_done(&dqp->q_flush) ||
224 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 203 !(lip->li_flags & XFS_LI_IN_AIL)) {
225 xfs_dqunlock(dqp); 204 xfs_dqunlock(dqp);
226 return; 205 return;
227 } 206 }
228 mp = dqp->q_mount; 207
229 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, 208 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
230 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); 209 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
231 xfs_dqunlock(dqp); 210 xfs_dqunlock(dqp);
232 if (!bp) 211 if (!bp)
233 return; 212 return;
234 if (XFS_BUF_ISDELAYWRITE(bp)) 213 if (XFS_BUF_ISDELAYWRITE(bp))
235 xfs_buf_delwri_promote(bp); 214 xfs_buf_delwri_promote(bp);
236 xfs_buf_relse(bp); 215 xfs_buf_relse(bp);
237 return;
238
239} 216}
240 217
241/* 218/*
@@ -250,15 +227,14 @@ xfs_qm_dquot_logitem_pushbuf(
250 */ 227 */
251STATIC uint 228STATIC uint
252xfs_qm_dquot_logitem_trylock( 229xfs_qm_dquot_logitem_trylock(
253 xfs_dq_logitem_t *qip) 230 struct xfs_log_item *lip)
254{ 231{
255 xfs_dquot_t *dqp; 232 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
256 233
257 dqp = qip->qli_dquot;
258 if (atomic_read(&dqp->q_pincount) > 0) 234 if (atomic_read(&dqp->q_pincount) > 0)
259 return XFS_ITEM_PINNED; 235 return XFS_ITEM_PINNED;
260 236
261 if (! xfs_qm_dqlock_nowait(dqp)) 237 if (!xfs_qm_dqlock_nowait(dqp))
262 return XFS_ITEM_LOCKED; 238 return XFS_ITEM_LOCKED;
263 239
264 if (!xfs_dqflock_nowait(dqp)) { 240 if (!xfs_dqflock_nowait(dqp)) {
@@ -269,11 +245,10 @@ xfs_qm_dquot_logitem_trylock(
269 return XFS_ITEM_PUSHBUF; 245 return XFS_ITEM_PUSHBUF;
270 } 246 }
271 247
272 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); 248 ASSERT(lip->li_flags & XFS_LI_IN_AIL);
273 return XFS_ITEM_SUCCESS; 249 return XFS_ITEM_SUCCESS;
274} 250}
275 251
276
277/* 252/*
278 * Unlock the dquot associated with the log item. 253 * Unlock the dquot associated with the log item.
279 * Clear the fields of the dquot and dquot log item that 254 * Clear the fields of the dquot and dquot log item that
@@ -282,12 +257,10 @@ xfs_qm_dquot_logitem_trylock(
282 */ 257 */
283STATIC void 258STATIC void
284xfs_qm_dquot_logitem_unlock( 259xfs_qm_dquot_logitem_unlock(
285 xfs_dq_logitem_t *ql) 260 struct xfs_log_item *lip)
286{ 261{
287 xfs_dquot_t *dqp; 262 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
288 263
289 ASSERT(ql != NULL);
290 dqp = ql->qli_dquot;
291 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 264 ASSERT(XFS_DQ_IS_LOCKED(dqp));
292 265
293 /* 266 /*
@@ -304,43 +277,32 @@ xfs_qm_dquot_logitem_unlock(
304 xfs_dqunlock(dqp); 277 xfs_dqunlock(dqp);
305} 278}
306 279
307
308/* 280/*
309 * this needs to stamp an lsn into the dquot, I think. 281 * this needs to stamp an lsn into the dquot, I think.
310 * rpc's that look at user dquot's would then have to 282 * rpc's that look at user dquot's would then have to
311 * push on the dependency recorded in the dquot 283 * push on the dependency recorded in the dquot
312 */ 284 */
313/* ARGSUSED */
314STATIC void 285STATIC void
315xfs_qm_dquot_logitem_committing( 286xfs_qm_dquot_logitem_committing(
316 xfs_dq_logitem_t *l, 287 struct xfs_log_item *lip,
317 xfs_lsn_t lsn) 288 xfs_lsn_t lsn)
318{ 289{
319 return;
320} 290}
321 291
322
323/* 292/*
324 * This is the ops vector for dquots 293 * This is the ops vector for dquots
325 */ 294 */
326static struct xfs_item_ops xfs_dquot_item_ops = { 295static struct xfs_item_ops xfs_dquot_item_ops = {
327 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, 296 .iop_size = xfs_qm_dquot_logitem_size,
328 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 297 .iop_format = xfs_qm_dquot_logitem_format,
329 xfs_qm_dquot_logitem_format, 298 .iop_pin = xfs_qm_dquot_logitem_pin,
330 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, 299 .iop_unpin = xfs_qm_dquot_logitem_unpin,
331 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin, 300 .iop_trylock = xfs_qm_dquot_logitem_trylock,
332 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 301 .iop_unlock = xfs_qm_dquot_logitem_unlock,
333 xfs_qm_dquot_logitem_unpin_remove, 302 .iop_committed = xfs_qm_dquot_logitem_committed,
334 .iop_trylock = (uint(*)(xfs_log_item_t*)) 303 .iop_push = xfs_qm_dquot_logitem_push,
335 xfs_qm_dquot_logitem_trylock, 304 .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf,
336 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock, 305 .iop_committing = xfs_qm_dquot_logitem_committing
337 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
338 xfs_qm_dquot_logitem_committed,
339 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
340 .iop_pushbuf = (void(*)(xfs_log_item_t*))
341 xfs_qm_dquot_logitem_pushbuf,
342 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
343 xfs_qm_dquot_logitem_committing
344}; 306};
345 307
346/* 308/*
@@ -350,10 +312,9 @@ static struct xfs_item_ops xfs_dquot_item_ops = {
350 */ 312 */
351void 313void
352xfs_qm_dquot_logitem_init( 314xfs_qm_dquot_logitem_init(
353 struct xfs_dquot *dqp) 315 struct xfs_dquot *dqp)
354{ 316{
355 xfs_dq_logitem_t *lp; 317 struct xfs_dq_logitem *lp = &dqp->q_logitem;
356 lp = &dqp->q_logitem;
357 318
358 xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, 319 xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
359 &xfs_dquot_item_ops); 320 &xfs_dquot_item_ops);
@@ -374,16 +335,22 @@ xfs_qm_dquot_logitem_init(
374 335
375/*------------------ QUOTAOFF LOG ITEMS -------------------*/ 336/*------------------ QUOTAOFF LOG ITEMS -------------------*/
376 337
338static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
339{
340 return container_of(lip, struct xfs_qoff_logitem, qql_item);
341}
342
343
377/* 344/*
378 * This returns the number of iovecs needed to log the given quotaoff item. 345 * This returns the number of iovecs needed to log the given quotaoff item.
379 * We only need 1 iovec for an quotaoff item. It just logs the 346 * We only need 1 iovec for an quotaoff item. It just logs the
380 * quotaoff_log_format structure. 347 * quotaoff_log_format structure.
381 */ 348 */
382/*ARGSUSED*/
383STATIC uint 349STATIC uint
384xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf) 350xfs_qm_qoff_logitem_size(
351 struct xfs_log_item *lip)
385{ 352{
386 return (1); 353 return 1;
387} 354}
388 355
389/* 356/*
@@ -394,53 +361,46 @@ xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
394 * slots in the quotaoff item have been filled. 361 * slots in the quotaoff item have been filled.
395 */ 362 */
396STATIC void 363STATIC void
397xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, 364xfs_qm_qoff_logitem_format(
398 xfs_log_iovec_t *log_vector) 365 struct xfs_log_item *lip,
366 struct xfs_log_iovec *log_vector)
399{ 367{
400 ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF); 368 struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
369
370 ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
401 371
402 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); 372 log_vector->i_addr = &qflip->qql_format;
403 log_vector->i_len = sizeof(xfs_qoff_logitem_t); 373 log_vector->i_len = sizeof(xfs_qoff_logitem_t);
404 log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; 374 log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
405 qf->qql_format.qf_size = 1; 375 qflip->qql_format.qf_size = 1;
406} 376}
407 377
408
409/* 378/*
410 * Pinning has no meaning for an quotaoff item, so just return. 379 * Pinning has no meaning for an quotaoff item, so just return.
411 */ 380 */
412/*ARGSUSED*/
413STATIC void 381STATIC void
414xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf) 382xfs_qm_qoff_logitem_pin(
383 struct xfs_log_item *lip)
415{ 384{
416 return;
417} 385}
418 386
419
420/* 387/*
421 * Since pinning has no meaning for an quotaoff item, unpinning does 388 * Since pinning has no meaning for an quotaoff item, unpinning does
422 * not either. 389 * not either.
423 */ 390 */
424/*ARGSUSED*/
425STATIC void 391STATIC void
426xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf) 392xfs_qm_qoff_logitem_unpin(
393 struct xfs_log_item *lip,
394 int remove)
427{ 395{
428 return;
429}
430
431/*ARGSUSED*/
432STATIC void
433xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
434{
435 return;
436} 396}
437 397
438/* 398/*
439 * Quotaoff items have no locking, so just return success. 399 * Quotaoff items have no locking, so just return success.
440 */ 400 */
441/*ARGSUSED*/
442STATIC uint 401STATIC uint
443xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf) 402xfs_qm_qoff_logitem_trylock(
403 struct xfs_log_item *lip)
444{ 404{
445 return XFS_ITEM_LOCKED; 405 return XFS_ITEM_LOCKED;
446} 406}
@@ -449,53 +409,51 @@ xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
449 * Quotaoff items have no locking or pushing, so return failure 409 * Quotaoff items have no locking or pushing, so return failure
450 * so that the caller doesn't bother with us. 410 * so that the caller doesn't bother with us.
451 */ 411 */
452/*ARGSUSED*/
453STATIC void 412STATIC void
454xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf) 413xfs_qm_qoff_logitem_unlock(
414 struct xfs_log_item *lip)
455{ 415{
456 return;
457} 416}
458 417
459/* 418/*
460 * The quotaoff-start-item is logged only once and cannot be moved in the log, 419 * The quotaoff-start-item is logged only once and cannot be moved in the log,
461 * so simply return the lsn at which it's been logged. 420 * so simply return the lsn at which it's been logged.
462 */ 421 */
463/*ARGSUSED*/
464STATIC xfs_lsn_t 422STATIC xfs_lsn_t
465xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn) 423xfs_qm_qoff_logitem_committed(
424 struct xfs_log_item *lip,
425 xfs_lsn_t lsn)
466{ 426{
467 return (lsn); 427 return lsn;
468} 428}
469 429
470/* 430/*
471 * There isn't much you can do to push on an quotaoff item. It is simply 431 * There isn't much you can do to push on an quotaoff item. It is simply
472 * stuck waiting for the log to be flushed to disk. 432 * stuck waiting for the log to be flushed to disk.
473 */ 433 */
474/*ARGSUSED*/
475STATIC void 434STATIC void
476xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf) 435xfs_qm_qoff_logitem_push(
436 struct xfs_log_item *lip)
477{ 437{
478 return;
479} 438}
480 439
481 440
482/*ARGSUSED*/
483STATIC xfs_lsn_t 441STATIC xfs_lsn_t
484xfs_qm_qoffend_logitem_committed( 442xfs_qm_qoffend_logitem_committed(
485 xfs_qoff_logitem_t *qfe, 443 struct xfs_log_item *lip,
486 xfs_lsn_t lsn) 444 xfs_lsn_t lsn)
487{ 445{
488 xfs_qoff_logitem_t *qfs; 446 struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
489 struct xfs_ail *ailp; 447 struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
448 struct xfs_ail *ailp = qfs->qql_item.li_ailp;
490 449
491 qfs = qfe->qql_start_lip;
492 ailp = qfs->qql_item.li_ailp;
493 spin_lock(&ailp->xa_lock);
494 /* 450 /*
495 * Delete the qoff-start logitem from the AIL. 451 * Delete the qoff-start logitem from the AIL.
496 * xfs_trans_ail_delete() drops the AIL lock. 452 * xfs_trans_ail_delete() drops the AIL lock.
497 */ 453 */
454 spin_lock(&ailp->xa_lock);
498 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); 455 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
456
499 kmem_free(qfs); 457 kmem_free(qfs);
500 kmem_free(qfe); 458 kmem_free(qfe);
501 return (xfs_lsn_t)-1; 459 return (xfs_lsn_t)-1;
@@ -515,71 +473,52 @@ xfs_qm_qoffend_logitem_committed(
515 * (truly makes the quotaoff irrevocable). If we do something else, 473 * (truly makes the quotaoff irrevocable). If we do something else,
516 * then maybe we don't need two. 474 * then maybe we don't need two.
517 */ 475 */
518/* ARGSUSED */
519STATIC void
520xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
521{
522 return;
523}
524
525/* ARGSUSED */
526STATIC void 476STATIC void
527xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) 477xfs_qm_qoff_logitem_committing(
478 struct xfs_log_item *lip,
479 xfs_lsn_t commit_lsn)
528{ 480{
529 return;
530} 481}
531 482
532static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { 483static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
533 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, 484 .iop_size = xfs_qm_qoff_logitem_size,
534 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 485 .iop_format = xfs_qm_qoff_logitem_format,
535 xfs_qm_qoff_logitem_format, 486 .iop_pin = xfs_qm_qoff_logitem_pin,
536 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 487 .iop_unpin = xfs_qm_qoff_logitem_unpin,
537 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, 488 .iop_trylock = xfs_qm_qoff_logitem_trylock,
538 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 489 .iop_unlock = xfs_qm_qoff_logitem_unlock,
539 xfs_qm_qoff_logitem_unpin_remove, 490 .iop_committed = xfs_qm_qoffend_logitem_committed,
540 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 491 .iop_push = xfs_qm_qoff_logitem_push,
541 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, 492 .iop_committing = xfs_qm_qoff_logitem_committing
542 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
543 xfs_qm_qoffend_logitem_committed,
544 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
545 .iop_pushbuf = NULL,
546 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
547 xfs_qm_qoffend_logitem_committing
548}; 493};
549 494
550/* 495/*
551 * This is the ops vector shared by all quotaoff-start log items. 496 * This is the ops vector shared by all quotaoff-start log items.
552 */ 497 */
553static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { 498static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
554 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, 499 .iop_size = xfs_qm_qoff_logitem_size,
555 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 500 .iop_format = xfs_qm_qoff_logitem_format,
556 xfs_qm_qoff_logitem_format, 501 .iop_pin = xfs_qm_qoff_logitem_pin,
557 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 502 .iop_unpin = xfs_qm_qoff_logitem_unpin,
558 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, 503 .iop_trylock = xfs_qm_qoff_logitem_trylock,
559 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 504 .iop_unlock = xfs_qm_qoff_logitem_unlock,
560 xfs_qm_qoff_logitem_unpin_remove, 505 .iop_committed = xfs_qm_qoff_logitem_committed,
561 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 506 .iop_push = xfs_qm_qoff_logitem_push,
562 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, 507 .iop_committing = xfs_qm_qoff_logitem_committing
563 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
564 xfs_qm_qoff_logitem_committed,
565 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
566 .iop_pushbuf = NULL,
567 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
568 xfs_qm_qoff_logitem_committing
569}; 508};
570 509
571/* 510/*
572 * Allocate and initialize an quotaoff item of the correct quota type(s). 511 * Allocate and initialize an quotaoff item of the correct quota type(s).
573 */ 512 */
574xfs_qoff_logitem_t * 513struct xfs_qoff_logitem *
575xfs_qm_qoff_logitem_init( 514xfs_qm_qoff_logitem_init(
576 struct xfs_mount *mp, 515 struct xfs_mount *mp,
577 xfs_qoff_logitem_t *start, 516 struct xfs_qoff_logitem *start,
578 uint flags) 517 uint flags)
579{ 518{
580 xfs_qoff_logitem_t *qf; 519 struct xfs_qoff_logitem *qf;
581 520
582 qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); 521 qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
583 522
584 xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? 523 xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
585 &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); 524 &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
@@ -587,5 +526,5 @@ xfs_qm_qoff_logitem_init(
587 qf->qql_format.qf_type = XFS_LI_QUOTAOFF; 526 qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
588 qf->qql_format.qf_flags = flags; 527 qf->qql_format.qf_flags = flags;
589 qf->qql_start_lip = start; 528 qf->qql_start_lip = start;
590 return (qf); 529 return qf;
591} 530}
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 67c018392d62..9a92407109a1 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -23,25 +23,18 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 31#include "xfs_dinode.h"
37#include "xfs_inode.h" 32#include "xfs_inode.h"
38#include "xfs_btree.h"
39#include "xfs_ialloc.h" 33#include "xfs_ialloc.h"
40#include "xfs_itable.h" 34#include "xfs_itable.h"
41#include "xfs_rtalloc.h" 35#include "xfs_rtalloc.h"
42#include "xfs_error.h" 36#include "xfs_error.h"
43#include "xfs_bmap.h" 37#include "xfs_bmap.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 38#include "xfs_attr.h"
46#include "xfs_buf_item.h" 39#include "xfs_buf_item.h"
47#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
@@ -1497,7 +1490,7 @@ xfs_qm_dqiterate(
1497 maxlblkcnt - lblkno, 1490 maxlblkcnt - lblkno,
1498 XFS_BMAPI_METADATA, 1491 XFS_BMAPI_METADATA,
1499 NULL, 1492 NULL,
1500 0, map, &nmaps, NULL, NULL); 1493 0, map, &nmaps, NULL);
1501 xfs_iunlock(qip, XFS_ILOCK_SHARED); 1494 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1502 if (error) 1495 if (error)
1503 break; 1496 break;
@@ -1669,7 +1662,8 @@ xfs_qm_dqusage_adjust(
1669 * making us disable quotas for the file system. 1662 * making us disable quotas for the file system.
1670 */ 1663 */
1671 if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { 1664 if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1672 xfs_iput(ip, XFS_ILOCK_EXCL); 1665 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1666 IRELE(ip);
1673 *res = BULKSTAT_RV_GIVEUP; 1667 *res = BULKSTAT_RV_GIVEUP;
1674 return error; 1668 return error;
1675 } 1669 }
@@ -1682,7 +1676,8 @@ xfs_qm_dqusage_adjust(
1682 * Walk thru the extent list and count the realtime blocks. 1676 * Walk thru the extent list and count the realtime blocks.
1683 */ 1677 */
1684 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { 1678 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1685 xfs_iput(ip, XFS_ILOCK_EXCL); 1679 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1680 IRELE(ip);
1686 if (udqp) 1681 if (udqp)
1687 xfs_qm_dqput(udqp); 1682 xfs_qm_dqput(udqp);
1688 if (gdqp) 1683 if (gdqp)
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 97b410c12794..bea02d786c5d 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_itable.h" 31#include "xfs_itable.h"
40#include "xfs_btree.h"
41#include "xfs_bmap.h" 32#include "xfs_bmap.h"
42#include "xfs_rtalloc.h" 33#include "xfs_rtalloc.h"
43#include "xfs_error.h" 34#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_qm.h" 37#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 3d1fc79532e2..8671a0b32644 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_itable.h" 31#include "xfs_itable.h"
40#include "xfs_bmap.h" 32#include "xfs_bmap.h"
41#include "xfs_btree.h"
42#include "xfs_rtalloc.h" 33#include "xfs_rtalloc.h"
43#include "xfs_error.h" 34#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_qm.h" 37#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index b4487764e923..45e5849df238 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -26,25 +26,15 @@
26#include "xfs_trans.h" 26#include "xfs_trans.h"
27#include "xfs_sb.h" 27#include "xfs_sb.h"
28#include "xfs_ag.h" 28#include "xfs_ag.h"
29#include "xfs_dir2.h"
30#include "xfs_alloc.h" 29#include "xfs_alloc.h"
31#include "xfs_dmapi.h"
32#include "xfs_quota.h" 30#include "xfs_quota.h"
33#include "xfs_mount.h" 31#include "xfs_mount.h"
34#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
35#include "xfs_alloc_btree.h"
36#include "xfs_ialloc_btree.h"
37#include "xfs_dir2_sf.h"
38#include "xfs_attr_sf.h"
39#include "xfs_dinode.h"
40#include "xfs_inode.h" 33#include "xfs_inode.h"
41#include "xfs_ialloc.h"
42#include "xfs_itable.h" 34#include "xfs_itable.h"
43#include "xfs_bmap.h" 35#include "xfs_bmap.h"
44#include "xfs_btree.h"
45#include "xfs_rtalloc.h" 36#include "xfs_rtalloc.h"
46#include "xfs_error.h" 37#include "xfs_error.h"
47#include "xfs_rw.h"
48#include "xfs_attr.h" 38#include "xfs_attr.h"
49#include "xfs_buf_item.h" 39#include "xfs_buf_item.h"
50#include "xfs_utils.h" 40#include "xfs_utils.h"
@@ -248,40 +238,74 @@ out_unlock:
248 return error; 238 return error;
249} 239}
250 240
241STATIC int
242xfs_qm_scall_trunc_qfile(
243 struct xfs_mount *mp,
244 xfs_ino_t ino)
245{
246 struct xfs_inode *ip;
247 struct xfs_trans *tp;
248 int error;
249
250 if (ino == NULLFSINO)
251 return 0;
252
253 error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
254 if (error)
255 return error;
256
257 xfs_ilock(ip, XFS_IOLOCK_EXCL);
258
259 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
260 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
261 XFS_TRANS_PERM_LOG_RES,
262 XFS_ITRUNCATE_LOG_COUNT);
263 if (error) {
264 xfs_trans_cancel(tp, 0);
265 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
266 goto out_put;
267 }
268
269 xfs_ilock(ip, XFS_ILOCK_EXCL);
270 xfs_trans_ijoin(tp, ip);
271
272 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
273 if (error) {
274 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
275 XFS_TRANS_ABORT);
276 goto out_unlock;
277 }
278
279 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
280 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
281
282out_unlock:
283 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
284out_put:
285 IRELE(ip);
286 return error;
287}
288
251int 289int
252xfs_qm_scall_trunc_qfiles( 290xfs_qm_scall_trunc_qfiles(
253 xfs_mount_t *mp, 291 xfs_mount_t *mp,
254 uint flags) 292 uint flags)
255{ 293{
256 int error = 0, error2 = 0; 294 int error = 0, error2 = 0;
257 xfs_inode_t *qip;
258 295
259 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 296 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
260 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 297 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
261 return XFS_ERROR(EINVAL); 298 return XFS_ERROR(EINVAL);
262 } 299 }
263 300
264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { 301 if (flags & XFS_DQ_USER)
265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip); 302 error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
266 if (!error) { 303 if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
267 error = xfs_truncate_file(mp, qip); 304 error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
268 IRELE(qip);
269 }
270 }
271
272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
273 mp->m_sb.sb_gquotino != NULLFSINO) {
274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
275 if (!error2) {
276 error2 = xfs_truncate_file(mp, qip);
277 IRELE(qip);
278 }
279 }
280 305
281 return error ? error : error2; 306 return error ? error : error2;
282} 307}
283 308
284
285/* 309/*
286 * Switch on (a given) quota enforcement for a filesystem. This takes 310 * Switch on (a given) quota enforcement for a filesystem. This takes
287 * effect immediately. 311 * effect immediately.
@@ -786,9 +810,9 @@ xfs_qm_export_dquot(
786 } 810 }
787 811
788#ifdef DEBUG 812#ifdef DEBUG
789 if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) || 813 if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
790 (XFS_IS_OQUOTA_ENFORCED(mp) && 814 (XFS_IS_OQUOTA_ENFORCED(mp) &&
791 (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) && 815 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
792 dst->d_id != 0) { 816 dst->d_id != 0) {
793 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && 817 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
794 (dst->d_blk_softlimit > 0)) { 818 (dst->d_blk_softlimit > 0)) {
@@ -809,17 +833,17 @@ xfs_qm_export_qtype_flags(
809 /* 833 /*
810 * Can't be more than one, or none. 834 * Can't be more than one, or none.
811 */ 835 */
812 ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != 836 ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
813 (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); 837 (FS_PROJ_QUOTA | FS_USER_QUOTA));
814 ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != 838 ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
815 (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); 839 (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
816 ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != 840 ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
817 (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); 841 (FS_USER_QUOTA | FS_GROUP_QUOTA));
818 ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); 842 ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
819 843
820 return (flags & XFS_DQ_USER) ? 844 return (flags & XFS_DQ_USER) ?
821 XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? 845 FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
822 XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; 846 FS_PROJ_QUOTA : FS_GROUP_QUOTA;
823} 847}
824 848
825STATIC uint 849STATIC uint
@@ -830,16 +854,16 @@ xfs_qm_export_flags(
830 854
831 uflags = 0; 855 uflags = 0;
832 if (flags & XFS_UQUOTA_ACCT) 856 if (flags & XFS_UQUOTA_ACCT)
833 uflags |= XFS_QUOTA_UDQ_ACCT; 857 uflags |= FS_QUOTA_UDQ_ACCT;
834 if (flags & XFS_PQUOTA_ACCT) 858 if (flags & XFS_PQUOTA_ACCT)
835 uflags |= XFS_QUOTA_PDQ_ACCT; 859 uflags |= FS_QUOTA_PDQ_ACCT;
836 if (flags & XFS_GQUOTA_ACCT) 860 if (flags & XFS_GQUOTA_ACCT)
837 uflags |= XFS_QUOTA_GDQ_ACCT; 861 uflags |= FS_QUOTA_GDQ_ACCT;
838 if (flags & XFS_UQUOTA_ENFD) 862 if (flags & XFS_UQUOTA_ENFD)
839 uflags |= XFS_QUOTA_UDQ_ENFD; 863 uflags |= FS_QUOTA_UDQ_ENFD;
840 if (flags & (XFS_OQUOTA_ENFD)) { 864 if (flags & (XFS_OQUOTA_ENFD)) {
841 uflags |= (flags & XFS_GQUOTA_ACCT) ? 865 uflags |= (flags & XFS_GQUOTA_ACCT) ?
842 XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; 866 FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
843 } 867 }
844 return (uflags); 868 return (uflags);
845} 869}
@@ -875,8 +899,9 @@ xfs_dqrele_inode(
875 xfs_qm_dqrele(ip->i_gdquot); 899 xfs_qm_dqrele(ip->i_gdquot);
876 ip->i_gdquot = NULL; 900 ip->i_gdquot = NULL;
877 } 901 }
878 xfs_iput(ip, XFS_ILOCK_EXCL); 902 xfs_iunlock(ip, XFS_ILOCK_EXCL);
879 903
904 IRELE(ip);
880 return 0; 905 return 0;
881} 906}
882 907
@@ -1143,7 +1168,8 @@ xfs_qm_internalqcheck_adjust(
1143 * of those now. 1168 * of those now.
1144 */ 1169 */
1145 if (! ipreleased) { 1170 if (! ipreleased) {
1146 xfs_iput(ip, lock_flags); 1171 xfs_iunlock(ip, lock_flags);
1172 IRELE(ip);
1147 ipreleased = B_TRUE; 1173 ipreleased = B_TRUE;
1148 goto again; 1174 goto again;
1149 } 1175 }
@@ -1160,7 +1186,8 @@ xfs_qm_internalqcheck_adjust(
1160 ASSERT(gd); 1186 ASSERT(gd);
1161 xfs_qm_internalqcheck_dqadjust(ip, gd); 1187 xfs_qm_internalqcheck_dqadjust(ip, gd);
1162 } 1188 }
1163 xfs_iput(ip, lock_flags); 1189 xfs_iunlock(ip, lock_flags);
1190 IRELE(ip);
1164 *res = BULKSTAT_RV_DIDONE; 1191 *res = BULKSTAT_RV_DIDONE;
1165 return (0); 1192 return (0);
1166} 1193}
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 061d827da33c..7de91d1b75c0 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -23,25 +23,15 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h" 30#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_itable.h" 31#include "xfs_itable.h"
40#include "xfs_btree.h"
41#include "xfs_bmap.h" 32#include "xfs_bmap.h"
42#include "xfs_rtalloc.h" 33#include "xfs_rtalloc.h"
43#include "xfs_error.h" 34#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h" 35#include "xfs_attr.h"
46#include "xfs_buf_item.h" 36#include "xfs_buf_item.h"
47#include "xfs_trans_priv.h" 37#include "xfs_trans_priv.h"
@@ -59,16 +49,14 @@ xfs_trans_dqjoin(
59 xfs_trans_t *tp, 49 xfs_trans_t *tp,
60 xfs_dquot_t *dqp) 50 xfs_dquot_t *dqp)
61{ 51{
62 xfs_dq_logitem_t *lp = &dqp->q_logitem;
63
64 ASSERT(dqp->q_transp != tp); 52 ASSERT(dqp->q_transp != tp);
65 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 53 ASSERT(XFS_DQ_IS_LOCKED(dqp));
66 ASSERT(lp->qli_dquot == dqp); 54 ASSERT(dqp->q_logitem.qli_dquot == dqp);
67 55
68 /* 56 /*
69 * Get a log_item_desc to point at the new item. 57 * Get a log_item_desc to point at the new item.
70 */ 58 */
71 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp)); 59 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
72 60
73 /* 61 /*
74 * Initialize i_transp so we can later determine if this dquot is 62 * Initialize i_transp so we can later determine if this dquot is
@@ -93,16 +81,11 @@ xfs_trans_log_dquot(
93 xfs_trans_t *tp, 81 xfs_trans_t *tp,
94 xfs_dquot_t *dqp) 82 xfs_dquot_t *dqp)
95{ 83{
96 xfs_log_item_desc_t *lidp;
97
98 ASSERT(dqp->q_transp == tp); 84 ASSERT(dqp->q_transp == tp);
99 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 85 ASSERT(XFS_DQ_IS_LOCKED(dqp));
100 86
101 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
102 ASSERT(lidp != NULL);
103
104 tp->t_flags |= XFS_TRANS_DIRTY; 87 tp->t_flags |= XFS_TRANS_DIRTY;
105 lidp->lid_flags |= XFS_LID_DIRTY; 88 dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
106} 89}
107 90
108/* 91/*
@@ -874,9 +857,8 @@ xfs_trans_get_qoff_item(
874 /* 857 /*
875 * Get a log_item_desc to point at the new item. 858 * Get a log_item_desc to point at the new item.
876 */ 859 */
877 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q); 860 xfs_trans_add_item(tp, &q->qql_item);
878 861 return q;
879 return (q);
880} 862}
881 863
882 864
@@ -890,13 +872,8 @@ xfs_trans_log_quotaoff_item(
890 xfs_trans_t *tp, 872 xfs_trans_t *tp,
891 xfs_qoff_logitem_t *qlp) 873 xfs_qoff_logitem_t *qlp)
892{ 874{
893 xfs_log_item_desc_t *lidp;
894
895 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp);
896 ASSERT(lidp != NULL);
897
898 tp->t_flags |= XFS_TRANS_DIRTY; 875 tp->t_flags |= XFS_TRANS_DIRTY;
899 lidp->lid_flags |= XFS_LID_DIRTY; 876 qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
900} 877}
901 878
902STATIC void 879STATIC void
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 3f3610a7ee05..975aa10e1a47 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -22,7 +22,6 @@
22#include "xfs_sb.h" 22#include "xfs_sb.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dmapi.h"
26#include "xfs_mount.h" 25#include "xfs_mount.h"
27#include "xfs_error.h" 26#include "xfs_error.h"
28 27
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a7fbe8a99b12..af168faccc7a 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -24,18 +24,13 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 34#include "xfs_alloc.h"
40#include "xfs_error.h" 35#include "xfs_error.h"
41#include "xfs_trace.h" 36#include "xfs_trace.h"
@@ -688,8 +683,6 @@ xfs_alloc_ag_vextent_near(
688 xfs_agblock_t ltbno; /* start bno of left side entry */ 683 xfs_agblock_t ltbno; /* start bno of left side entry */
689 xfs_agblock_t ltbnoa; /* aligned ... */ 684 xfs_agblock_t ltbnoa; /* aligned ... */
690 xfs_extlen_t ltdiff; /* difference to left side entry */ 685 xfs_extlen_t ltdiff; /* difference to left side entry */
691 /*REFERENCED*/
692 xfs_agblock_t ltend; /* end bno of left side entry */
693 xfs_extlen_t ltlen; /* length of left side entry */ 686 xfs_extlen_t ltlen; /* length of left side entry */
694 xfs_extlen_t ltlena; /* aligned ... */ 687 xfs_extlen_t ltlena; /* aligned ... */
695 xfs_agblock_t ltnew; /* useful start bno of left side */ 688 xfs_agblock_t ltnew; /* useful start bno of left side */
@@ -814,8 +807,7 @@ xfs_alloc_ag_vextent_near(
814 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 807 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
815 goto error0; 808 goto error0;
816 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 809 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
817 ltend = ltbno + ltlen; 810 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
818 ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
819 args->len = blen; 811 args->len = blen;
820 if (!xfs_alloc_fix_minleft(args)) { 812 if (!xfs_alloc_fix_minleft(args)) {
821 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 813 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -828,7 +820,7 @@ xfs_alloc_ag_vextent_near(
828 */ 820 */
829 args->agbno = bnew; 821 args->agbno = bnew;
830 ASSERT(bnew >= ltbno); 822 ASSERT(bnew >= ltbno);
831 ASSERT(bnew + blen <= ltend); 823 ASSERT(bnew + blen <= ltbno + ltlen);
832 /* 824 /*
833 * Set up a cursor for the by-bno tree. 825 * Set up a cursor for the by-bno tree.
834 */ 826 */
@@ -1157,7 +1149,6 @@ xfs_alloc_ag_vextent_near(
1157 /* 1149 /*
1158 * Fix up the length and compute the useful address. 1150 * Fix up the length and compute the useful address.
1159 */ 1151 */
1160 ltend = ltbno + ltlen;
1161 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1152 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1162 xfs_alloc_fix_len(args); 1153 xfs_alloc_fix_len(args);
1163 if (!xfs_alloc_fix_minleft(args)) { 1154 if (!xfs_alloc_fix_minleft(args)) {
@@ -1170,7 +1161,7 @@ xfs_alloc_ag_vextent_near(
1170 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1161 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1171 ltlen, &ltnew); 1162 ltlen, &ltnew);
1172 ASSERT(ltnew >= ltbno); 1163 ASSERT(ltnew >= ltbno);
1173 ASSERT(ltnew + rlen <= ltend); 1164 ASSERT(ltnew + rlen <= ltbno + ltlen);
1174 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1165 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1175 args->agbno = ltnew; 1166 args->agbno = ltnew;
1176 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1167 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 6d05199b667c..895009a97271 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -27,16 +27,16 @@ struct xfs_busy_extent;
27/* 27/*
28 * Freespace allocation types. Argument to xfs_alloc_[v]extent. 28 * Freespace allocation types. Argument to xfs_alloc_[v]extent.
29 */ 29 */
30typedef enum xfs_alloctype 30#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */
31{ 31#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */
32 XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */ 32#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */
33 XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */ 33#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */
34 XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */ 34#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */
35 XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */ 35#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */
36 XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */ 36#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */
37 XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */ 37
38 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ 38/* this should become an enum again when the tracing code is fixed */
39} xfs_alloctype_t; 39typedef unsigned int xfs_alloctype_t;
40 40
41#define XFS_ALLOC_TYPES \ 41#define XFS_ALLOC_TYPES \
42 { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ 42 { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 83f494218759..97f7328967fd 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -24,19 +24,14 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
38#include "xfs_btree_trace.h" 34#include "xfs_btree_trace.h"
39#include "xfs_ialloc.h"
40#include "xfs_alloc.h" 35#include "xfs_alloc.h"
41#include "xfs_error.h" 36#include "xfs_error.h"
42#include "xfs_trace.h" 37#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index b9c196a53c42..c2568242a901 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -25,19 +25,13 @@
25#include "xfs_trans.h" 25#include "xfs_trans.h"
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_ag.h" 27#include "xfs_ag.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h" 31#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 32#include "xfs_dinode.h"
38#include "xfs_inode.h" 33#include "xfs_inode.h"
39#include "xfs_alloc.h" 34#include "xfs_alloc.h"
40#include "xfs_btree.h"
41#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
42#include "xfs_bmap.h" 36#include "xfs_bmap.h"
43#include "xfs_attr.h" 37#include "xfs_attr.h"
@@ -325,8 +319,7 @@ xfs_attr_set_int(
325 return (error); 319 return (error);
326 } 320 }
327 321
328 xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); 322 xfs_trans_ijoin(args.trans, dp);
329 xfs_trans_ihold(args.trans, dp);
330 323
331 /* 324 /*
332 * If the attribute list is non-existent or a shortform list, 325 * If the attribute list is non-existent or a shortform list,
@@ -396,10 +389,8 @@ xfs_attr_set_int(
396 * bmap_finish() may have committed the last trans and started 389 * bmap_finish() may have committed the last trans and started
397 * a new one. We need the inode to be in all transactions. 390 * a new one. We need the inode to be in all transactions.
398 */ 391 */
399 if (committed) { 392 if (committed)
400 xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); 393 xfs_trans_ijoin(args.trans, dp);
401 xfs_trans_ihold(args.trans, dp);
402 }
403 394
404 /* 395 /*
405 * Commit the leaf transformation. We'll need another (linked) 396 * Commit the leaf transformation. We'll need another (linked)
@@ -544,8 +535,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
544 * No need to make quota reservations here. We expect to release some 535 * No need to make quota reservations here. We expect to release some
545 * blocks not allocate in the common case. 536 * blocks not allocate in the common case.
546 */ 537 */
547 xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); 538 xfs_trans_ijoin(args.trans, dp);
548 xfs_trans_ihold(args.trans, dp);
549 539
550 /* 540 /*
551 * Decide on what work routines to call based on the inode size. 541 * Decide on what work routines to call based on the inode size.
@@ -821,8 +811,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
821 * No need to make quota reservations here. We expect to release some 811 * No need to make quota reservations here. We expect to release some
822 * blocks, not allocate, in the common case. 812 * blocks, not allocate, in the common case.
823 */ 813 */
824 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); 814 xfs_trans_ijoin(trans, dp);
825 xfs_trans_ihold(trans, dp);
826 815
827 /* 816 /*
828 * Decide on what work routines to call based on the inode size. 817 * Decide on what work routines to call based on the inode size.
@@ -981,10 +970,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
981 * bmap_finish() may have committed the last trans and started 970 * bmap_finish() may have committed the last trans and started
982 * a new one. We need the inode to be in all transactions. 971 * a new one. We need the inode to be in all transactions.
983 */ 972 */
984 if (committed) { 973 if (committed)
985 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 974 xfs_trans_ijoin(args->trans, dp);
986 xfs_trans_ihold(args->trans, dp);
987 }
988 975
989 /* 976 /*
990 * Commit the current trans (including the inode) and start 977 * Commit the current trans (including the inode) and start
@@ -1085,10 +1072,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1085 * and started a new one. We need the inode to be 1072 * and started a new one. We need the inode to be
1086 * in all transactions. 1073 * in all transactions.
1087 */ 1074 */
1088 if (committed) { 1075 if (committed)
1089 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1076 xfs_trans_ijoin(args->trans, dp);
1090 xfs_trans_ihold(args->trans, dp);
1091 }
1092 } else 1077 } else
1093 xfs_da_buf_done(bp); 1078 xfs_da_buf_done(bp);
1094 1079
@@ -1161,10 +1146,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1161 * bmap_finish() may have committed the last trans and started 1146 * bmap_finish() may have committed the last trans and started
1162 * a new one. We need the inode to be in all transactions. 1147 * a new one. We need the inode to be in all transactions.
1163 */ 1148 */
1164 if (committed) { 1149 if (committed)
1165 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1150 xfs_trans_ijoin(args->trans, dp);
1166 xfs_trans_ihold(args->trans, dp);
1167 }
1168 } else 1151 } else
1169 xfs_da_buf_done(bp); 1152 xfs_da_buf_done(bp);
1170 return(0); 1153 return(0);
@@ -1317,10 +1300,8 @@ restart:
1317 * and started a new one. We need the inode to be 1300 * and started a new one. We need the inode to be
1318 * in all transactions. 1301 * in all transactions.
1319 */ 1302 */
1320 if (committed) { 1303 if (committed)
1321 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1304 xfs_trans_ijoin(args->trans, dp);
1322 xfs_trans_ihold(args->trans, dp);
1323 }
1324 1305
1325 /* 1306 /*
1326 * Commit the node conversion and start the next 1307 * Commit the node conversion and start the next
@@ -1356,10 +1337,8 @@ restart:
1356 * bmap_finish() may have committed the last trans and started 1337 * bmap_finish() may have committed the last trans and started
1357 * a new one. We need the inode to be in all transactions. 1338 * a new one. We need the inode to be in all transactions.
1358 */ 1339 */
1359 if (committed) { 1340 if (committed)
1360 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1341 xfs_trans_ijoin(args->trans, dp);
1361 xfs_trans_ihold(args->trans, dp);
1362 }
1363 } else { 1342 } else {
1364 /* 1343 /*
1365 * Addition succeeded, update Btree hashvals. 1344 * Addition succeeded, update Btree hashvals.
@@ -1470,10 +1449,8 @@ restart:
1470 * and started a new one. We need the inode to be 1449 * and started a new one. We need the inode to be
1471 * in all transactions. 1450 * in all transactions.
1472 */ 1451 */
1473 if (committed) { 1452 if (committed)
1474 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1453 xfs_trans_ijoin(args->trans, dp);
1475 xfs_trans_ihold(args->trans, dp);
1476 }
1477 } 1454 }
1478 1455
1479 /* 1456 /*
@@ -1604,10 +1581,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1604 * bmap_finish() may have committed the last trans and started 1581 * bmap_finish() may have committed the last trans and started
1605 * a new one. We need the inode to be in all transactions. 1582 * a new one. We need the inode to be in all transactions.
1606 */ 1583 */
1607 if (committed) { 1584 if (committed)
1608 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1585 xfs_trans_ijoin(args->trans, dp);
1609 xfs_trans_ihold(args->trans, dp);
1610 }
1611 1586
1612 /* 1587 /*
1613 * Commit the Btree join operation and start a new trans. 1588 * Commit the Btree join operation and start a new trans.
@@ -1658,10 +1633,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1658 * and started a new one. We need the inode to be 1633 * and started a new one. We need the inode to be
1659 * in all transactions. 1634 * in all transactions.
1660 */ 1635 */
1661 if (committed) { 1636 if (committed)
1662 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 1637 xfs_trans_ijoin(args->trans, dp);
1663 xfs_trans_ihold(args->trans, dp);
1664 }
1665 } else 1638 } else
1666 xfs_da_brelse(args->trans, bp); 1639 xfs_da_brelse(args->trans, bp);
1667 } 1640 }
@@ -2004,7 +1977,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2004 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno, 1977 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
2005 args->rmtblkcnt, 1978 args->rmtblkcnt,
2006 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 1979 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2007 NULL, 0, map, &nmap, NULL, NULL); 1980 NULL, 0, map, &nmap, NULL);
2008 if (error) 1981 if (error)
2009 return(error); 1982 return(error);
2010 ASSERT(nmap >= 1); 1983 ASSERT(nmap >= 1);
@@ -2083,7 +2056,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2083 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA | 2056 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
2084 XFS_BMAPI_WRITE, 2057 XFS_BMAPI_WRITE,
2085 args->firstblock, args->total, &map, &nmap, 2058 args->firstblock, args->total, &map, &nmap,
2086 args->flist, NULL); 2059 args->flist);
2087 if (!error) { 2060 if (!error) {
2088 error = xfs_bmap_finish(&args->trans, args->flist, 2061 error = xfs_bmap_finish(&args->trans, args->flist,
2089 &committed); 2062 &committed);
@@ -2099,10 +2072,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2099 * bmap_finish() may have committed the last trans and started 2072 * bmap_finish() may have committed the last trans and started
2100 * a new one. We need the inode to be in all transactions. 2073 * a new one. We need the inode to be in all transactions.
2101 */ 2074 */
2102 if (committed) { 2075 if (committed)
2103 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); 2076 xfs_trans_ijoin(args->trans, dp);
2104 xfs_trans_ihold(args->trans, dp);
2105 }
2106 2077
2107 ASSERT(nmap == 1); 2078 ASSERT(nmap == 1);
2108 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 2079 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -2136,7 +2107,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2136 args->rmtblkcnt, 2107 args->rmtblkcnt,
2137 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2108 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2138 args->firstblock, 0, &map, &nmap, 2109 args->firstblock, 0, &map, &nmap,
2139 NULL, NULL); 2110 NULL);
2140 if (error) { 2111 if (error) {
2141 return(error); 2112 return(error);
2142 } 2113 }
@@ -2201,7 +2172,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2201 args->rmtblkcnt, 2172 args->rmtblkcnt,
2202 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2173 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2203 args->firstblock, 0, &map, &nmap, 2174 args->firstblock, 0, &map, &nmap,
2204 args->flist, NULL); 2175 args->flist);
2205 if (error) { 2176 if (error) {
2206 return(error); 2177 return(error);
2207 } 2178 }
@@ -2239,7 +2210,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2239 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 2210 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2240 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2211 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2241 1, args->firstblock, args->flist, 2212 1, args->firstblock, args->flist,
2242 NULL, &done); 2213 &done);
2243 if (!error) { 2214 if (!error) {
2244 error = xfs_bmap_finish(&args->trans, args->flist, 2215 error = xfs_bmap_finish(&args->trans, args->flist,
2245 &committed); 2216 &committed);
@@ -2255,10 +2226,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2255 * bmap_finish() may have committed the last trans and started 2226 * bmap_finish() may have committed the last trans and started
2256 * a new one. We need the inode to be in all transactions. 2227 * a new one. We need the inode to be in all transactions.
2257 */ 2228 */
2258 if (committed) { 2229 if (committed)
2259 xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL); 2230 xfs_trans_ijoin(args->trans, args->dp);
2260 xfs_trans_ihold(args->trans, args->dp);
2261 }
2262 2231
2263 /* 2232 /*
2264 * Close out trans and start the next one in the chain. 2233 * Close out trans and start the next one in the chain.
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a90ce74fc256..a6cff8edcdb6 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -24,8 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
@@ -33,7 +31,6 @@
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_alloc.h" 32#include "xfs_alloc.h"
35#include "xfs_btree.h" 33#include "xfs_btree.h"
36#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 35#include "xfs_dinode.h"
39#include "xfs_inode.h" 36#include "xfs_inode.h"
@@ -2931,7 +2928,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2931 nmap = 1; 2928 nmap = 1;
2932 error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt, 2929 error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
2933 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 2930 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2934 NULL, 0, &map, &nmap, NULL, NULL); 2931 NULL, 0, &map, &nmap, NULL);
2935 if (error) { 2932 if (error) {
2936 return(error); 2933 return(error);
2937 } 2934 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 99587ded043f..23f14e595c18 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -30,13 +30,10 @@
30#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h" 32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h" 33#include "xfs_dinode.h"
35#include "xfs_inode.h" 34#include "xfs_inode.h"
36#include "xfs_btree.h" 35#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h" 36#include "xfs_mount.h"
39#include "xfs_ialloc.h"
40#include "xfs_itable.h" 37#include "xfs_itable.h"
41#include "xfs_dir2_data.h" 38#include "xfs_dir2_data.h"
42#include "xfs_dir2_leaf.h" 39#include "xfs_dir2_leaf.h"
@@ -104,7 +101,6 @@ xfs_bmap_add_extent(
104 xfs_fsblock_t *first, /* pointer to firstblock variable */ 101 xfs_fsblock_t *first, /* pointer to firstblock variable */
105 xfs_bmap_free_t *flist, /* list of extents to be freed */ 102 xfs_bmap_free_t *flist, /* list of extents to be freed */
106 int *logflagsp, /* inode logging flags */ 103 int *logflagsp, /* inode logging flags */
107 xfs_extdelta_t *delta, /* Change made to incore extents */
108 int whichfork, /* data or attr fork */ 104 int whichfork, /* data or attr fork */
109 int rsvd); /* OK to allocate reserved blocks */ 105 int rsvd); /* OK to allocate reserved blocks */
110 106
@@ -122,7 +118,6 @@ xfs_bmap_add_extent_delay_real(
122 xfs_fsblock_t *first, /* pointer to firstblock variable */ 118 xfs_fsblock_t *first, /* pointer to firstblock variable */
123 xfs_bmap_free_t *flist, /* list of extents to be freed */ 119 xfs_bmap_free_t *flist, /* list of extents to be freed */
124 int *logflagsp, /* inode logging flags */ 120 int *logflagsp, /* inode logging flags */
125 xfs_extdelta_t *delta, /* Change made to incore extents */
126 int rsvd); /* OK to allocate reserved blocks */ 121 int rsvd); /* OK to allocate reserved blocks */
127 122
128/* 123/*
@@ -135,7 +130,6 @@ xfs_bmap_add_extent_hole_delay(
135 xfs_extnum_t idx, /* extent number to update/insert */ 130 xfs_extnum_t idx, /* extent number to update/insert */
136 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 131 xfs_bmbt_irec_t *new, /* new data to add to file extents */
137 int *logflagsp,/* inode logging flags */ 132 int *logflagsp,/* inode logging flags */
138 xfs_extdelta_t *delta, /* Change made to incore extents */
139 int rsvd); /* OK to allocate reserved blocks */ 133 int rsvd); /* OK to allocate reserved blocks */
140 134
141/* 135/*
@@ -149,7 +143,6 @@ xfs_bmap_add_extent_hole_real(
149 xfs_btree_cur_t *cur, /* if null, not a btree */ 143 xfs_btree_cur_t *cur, /* if null, not a btree */
150 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 144 xfs_bmbt_irec_t *new, /* new data to add to file extents */
151 int *logflagsp, /* inode logging flags */ 145 int *logflagsp, /* inode logging flags */
152 xfs_extdelta_t *delta, /* Change made to incore extents */
153 int whichfork); /* data or attr fork */ 146 int whichfork); /* data or attr fork */
154 147
155/* 148/*
@@ -162,8 +155,7 @@ xfs_bmap_add_extent_unwritten_real(
162 xfs_extnum_t idx, /* extent number to update/insert */ 155 xfs_extnum_t idx, /* extent number to update/insert */
163 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 156 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
164 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 157 xfs_bmbt_irec_t *new, /* new data to add to file extents */
165 int *logflagsp, /* inode logging flags */ 158 int *logflagsp); /* inode logging flags */
166 xfs_extdelta_t *delta); /* Change made to incore extents */
167 159
168/* 160/*
169 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 161 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
@@ -200,7 +192,6 @@ xfs_bmap_del_extent(
200 xfs_btree_cur_t *cur, /* if null, not a btree */ 192 xfs_btree_cur_t *cur, /* if null, not a btree */
201 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 193 xfs_bmbt_irec_t *new, /* new data to add to file extents */
202 int *logflagsp,/* inode logging flags */ 194 int *logflagsp,/* inode logging flags */
203 xfs_extdelta_t *delta, /* Change made to incore extents */
204 int whichfork, /* data or attr fork */ 195 int whichfork, /* data or attr fork */
205 int rsvd); /* OK to allocate reserved blocks */ 196 int rsvd); /* OK to allocate reserved blocks */
206 197
@@ -489,7 +480,6 @@ xfs_bmap_add_extent(
489 xfs_fsblock_t *first, /* pointer to firstblock variable */ 480 xfs_fsblock_t *first, /* pointer to firstblock variable */
490 xfs_bmap_free_t *flist, /* list of extents to be freed */ 481 xfs_bmap_free_t *flist, /* list of extents to be freed */
491 int *logflagsp, /* inode logging flags */ 482 int *logflagsp, /* inode logging flags */
492 xfs_extdelta_t *delta, /* Change made to incore extents */
493 int whichfork, /* data or attr fork */ 483 int whichfork, /* data or attr fork */
494 int rsvd) /* OK to use reserved data blocks */ 484 int rsvd) /* OK to use reserved data blocks */
495{ 485{
@@ -524,15 +514,6 @@ xfs_bmap_add_extent(
524 logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 514 logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
525 } else 515 } else
526 logflags = 0; 516 logflags = 0;
527 /* DELTA: single new extent */
528 if (delta) {
529 if (delta->xed_startoff > new->br_startoff)
530 delta->xed_startoff = new->br_startoff;
531 if (delta->xed_blockcount <
532 new->br_startoff + new->br_blockcount)
533 delta->xed_blockcount = new->br_startoff +
534 new->br_blockcount;
535 }
536 } 517 }
537 /* 518 /*
538 * Any kind of new delayed allocation goes here. 519 * Any kind of new delayed allocation goes here.
@@ -542,7 +523,7 @@ xfs_bmap_add_extent(
542 ASSERT((cur->bc_private.b.flags & 523 ASSERT((cur->bc_private.b.flags &
543 XFS_BTCUR_BPRV_WASDEL) == 0); 524 XFS_BTCUR_BPRV_WASDEL) == 0);
544 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, 525 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
545 &logflags, delta, rsvd))) 526 &logflags, rsvd)))
546 goto done; 527 goto done;
547 } 528 }
548 /* 529 /*
@@ -553,7 +534,7 @@ xfs_bmap_add_extent(
553 ASSERT((cur->bc_private.b.flags & 534 ASSERT((cur->bc_private.b.flags &
554 XFS_BTCUR_BPRV_WASDEL) == 0); 535 XFS_BTCUR_BPRV_WASDEL) == 0);
555 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, 536 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
556 &logflags, delta, whichfork))) 537 &logflags, whichfork)))
557 goto done; 538 goto done;
558 } else { 539 } else {
559 xfs_bmbt_irec_t prev; /* old extent at offset idx */ 540 xfs_bmbt_irec_t prev; /* old extent at offset idx */
@@ -578,17 +559,17 @@ xfs_bmap_add_extent(
578 XFS_BTCUR_BPRV_WASDEL); 559 XFS_BTCUR_BPRV_WASDEL);
579 if ((error = xfs_bmap_add_extent_delay_real(ip, 560 if ((error = xfs_bmap_add_extent_delay_real(ip,
580 idx, &cur, new, &da_new, first, flist, 561 idx, &cur, new, &da_new, first, flist,
581 &logflags, delta, rsvd))) 562 &logflags, rsvd)))
582 goto done; 563 goto done;
583 } else if (new->br_state == XFS_EXT_NORM) { 564 } else if (new->br_state == XFS_EXT_NORM) {
584 ASSERT(new->br_state == XFS_EXT_NORM); 565 ASSERT(new->br_state == XFS_EXT_NORM);
585 if ((error = xfs_bmap_add_extent_unwritten_real( 566 if ((error = xfs_bmap_add_extent_unwritten_real(
586 ip, idx, &cur, new, &logflags, delta))) 567 ip, idx, &cur, new, &logflags)))
587 goto done; 568 goto done;
588 } else { 569 } else {
589 ASSERT(new->br_state == XFS_EXT_UNWRITTEN); 570 ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
590 if ((error = xfs_bmap_add_extent_unwritten_real( 571 if ((error = xfs_bmap_add_extent_unwritten_real(
591 ip, idx, &cur, new, &logflags, delta))) 572 ip, idx, &cur, new, &logflags)))
592 goto done; 573 goto done;
593 } 574 }
594 ASSERT(*curp == cur || *curp == NULL); 575 ASSERT(*curp == cur || *curp == NULL);
@@ -601,7 +582,7 @@ xfs_bmap_add_extent(
601 ASSERT((cur->bc_private.b.flags & 582 ASSERT((cur->bc_private.b.flags &
602 XFS_BTCUR_BPRV_WASDEL) == 0); 583 XFS_BTCUR_BPRV_WASDEL) == 0);
603 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, 584 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
604 new, &logflags, delta, whichfork))) 585 new, &logflags, whichfork)))
605 goto done; 586 goto done;
606 } 587 }
607 } 588 }
@@ -666,7 +647,6 @@ xfs_bmap_add_extent_delay_real(
666 xfs_fsblock_t *first, /* pointer to firstblock variable */ 647 xfs_fsblock_t *first, /* pointer to firstblock variable */
667 xfs_bmap_free_t *flist, /* list of extents to be freed */ 648 xfs_bmap_free_t *flist, /* list of extents to be freed */
668 int *logflagsp, /* inode logging flags */ 649 int *logflagsp, /* inode logging flags */
669 xfs_extdelta_t *delta, /* Change made to incore extents */
670 int rsvd) /* OK to use reserved data block allocation */ 650 int rsvd) /* OK to use reserved data block allocation */
671{ 651{
672 xfs_btree_cur_t *cur; /* btree cursor */ 652 xfs_btree_cur_t *cur; /* btree cursor */
@@ -797,11 +777,6 @@ xfs_bmap_add_extent_delay_real(
797 goto done; 777 goto done;
798 } 778 }
799 *dnew = 0; 779 *dnew = 0;
800 /* DELTA: Three in-core extents are replaced by one. */
801 temp = LEFT.br_startoff;
802 temp2 = LEFT.br_blockcount +
803 PREV.br_blockcount +
804 RIGHT.br_blockcount;
805 break; 780 break;
806 781
807 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 782 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -832,10 +807,6 @@ xfs_bmap_add_extent_delay_real(
832 goto done; 807 goto done;
833 } 808 }
834 *dnew = 0; 809 *dnew = 0;
835 /* DELTA: Two in-core extents are replaced by one. */
836 temp = LEFT.br_startoff;
837 temp2 = LEFT.br_blockcount +
838 PREV.br_blockcount;
839 break; 810 break;
840 811
841 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 812 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -867,10 +838,6 @@ xfs_bmap_add_extent_delay_real(
867 goto done; 838 goto done;
868 } 839 }
869 *dnew = 0; 840 *dnew = 0;
870 /* DELTA: Two in-core extents are replaced by one. */
871 temp = PREV.br_startoff;
872 temp2 = PREV.br_blockcount +
873 RIGHT.br_blockcount;
874 break; 841 break;
875 842
876 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 843 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -900,9 +867,6 @@ xfs_bmap_add_extent_delay_real(
900 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 867 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
901 } 868 }
902 *dnew = 0; 869 *dnew = 0;
903 /* DELTA: The in-core extent described by new changed type. */
904 temp = new->br_startoff;
905 temp2 = new->br_blockcount;
906 break; 870 break;
907 871
908 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 872 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -942,10 +906,6 @@ xfs_bmap_add_extent_delay_real(
942 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 906 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
943 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 907 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
944 *dnew = temp; 908 *dnew = temp;
945 /* DELTA: The boundary between two in-core extents moved. */
946 temp = LEFT.br_startoff;
947 temp2 = LEFT.br_blockcount +
948 PREV.br_blockcount;
949 break; 909 break;
950 910
951 case BMAP_LEFT_FILLING: 911 case BMAP_LEFT_FILLING:
@@ -990,9 +950,6 @@ xfs_bmap_add_extent_delay_real(
990 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 950 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
991 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); 951 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
992 *dnew = temp; 952 *dnew = temp;
993 /* DELTA: One in-core extent is split in two. */
994 temp = PREV.br_startoff;
995 temp2 = PREV.br_blockcount;
996 break; 953 break;
997 954
998 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 955 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1031,10 +988,6 @@ xfs_bmap_add_extent_delay_real(
1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 988 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 989 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1033 *dnew = temp; 990 *dnew = temp;
1034 /* DELTA: The boundary between two in-core extents moved. */
1035 temp = PREV.br_startoff;
1036 temp2 = PREV.br_blockcount +
1037 RIGHT.br_blockcount;
1038 break; 991 break;
1039 992
1040 case BMAP_RIGHT_FILLING: 993 case BMAP_RIGHT_FILLING:
@@ -1078,9 +1031,6 @@ xfs_bmap_add_extent_delay_real(
1078 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1079 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1080 *dnew = temp; 1033 *dnew = temp;
1081 /* DELTA: One in-core extent is split in two. */
1082 temp = PREV.br_startoff;
1083 temp2 = PREV.br_blockcount;
1084 break; 1034 break;
1085 1035
1086 case 0: 1036 case 0:
@@ -1161,9 +1111,6 @@ xfs_bmap_add_extent_delay_real(
1161 nullstartblock((int)temp2)); 1111 nullstartblock((int)temp2));
1162 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); 1112 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
1163 *dnew = temp + temp2; 1113 *dnew = temp + temp2;
1164 /* DELTA: One in-core extent is split in three. */
1165 temp = PREV.br_startoff;
1166 temp2 = PREV.br_blockcount;
1167 break; 1114 break;
1168 1115
1169 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1116 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1179,13 +1126,6 @@ xfs_bmap_add_extent_delay_real(
1179 ASSERT(0); 1126 ASSERT(0);
1180 } 1127 }
1181 *curp = cur; 1128 *curp = cur;
1182 if (delta) {
1183 temp2 += temp;
1184 if (delta->xed_startoff > temp)
1185 delta->xed_startoff = temp;
1186 if (delta->xed_blockcount < temp2)
1187 delta->xed_blockcount = temp2;
1188 }
1189done: 1129done:
1190 *logflagsp = rval; 1130 *logflagsp = rval;
1191 return error; 1131 return error;
@@ -1204,8 +1144,7 @@ xfs_bmap_add_extent_unwritten_real(
1204 xfs_extnum_t idx, /* extent number to update/insert */ 1144 xfs_extnum_t idx, /* extent number to update/insert */
1205 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 1145 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
1206 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1146 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1207 int *logflagsp, /* inode logging flags */ 1147 int *logflagsp) /* inode logging flags */
1208 xfs_extdelta_t *delta) /* Change made to incore extents */
1209{ 1148{
1210 xfs_btree_cur_t *cur; /* btree cursor */ 1149 xfs_btree_cur_t *cur; /* btree cursor */
1211 xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ 1150 xfs_bmbt_rec_host_t *ep; /* extent entry for idx */
@@ -1219,8 +1158,6 @@ xfs_bmap_add_extent_unwritten_real(
1219 /* left is 0, right is 1, prev is 2 */ 1158 /* left is 0, right is 1, prev is 2 */
1220 int rval=0; /* return value (logging flags) */ 1159 int rval=0; /* return value (logging flags) */
1221 int state = 0;/* state bits, accessed thru macros */ 1160 int state = 0;/* state bits, accessed thru macros */
1222 xfs_filblks_t temp=0;
1223 xfs_filblks_t temp2=0;
1224 1161
1225#define LEFT r[0] 1162#define LEFT r[0]
1226#define RIGHT r[1] 1163#define RIGHT r[1]
@@ -1341,11 +1278,6 @@ xfs_bmap_add_extent_unwritten_real(
1341 RIGHT.br_blockcount, LEFT.br_state))) 1278 RIGHT.br_blockcount, LEFT.br_state)))
1342 goto done; 1279 goto done;
1343 } 1280 }
1344 /* DELTA: Three in-core extents are replaced by one. */
1345 temp = LEFT.br_startoff;
1346 temp2 = LEFT.br_blockcount +
1347 PREV.br_blockcount +
1348 RIGHT.br_blockcount;
1349 break; 1281 break;
1350 1282
1351 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1283 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1382,10 +1314,6 @@ xfs_bmap_add_extent_unwritten_real(
1382 LEFT.br_state))) 1314 LEFT.br_state)))
1383 goto done; 1315 goto done;
1384 } 1316 }
1385 /* DELTA: Two in-core extents are replaced by one. */
1386 temp = LEFT.br_startoff;
1387 temp2 = LEFT.br_blockcount +
1388 PREV.br_blockcount;
1389 break; 1317 break;
1390 1318
1391 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1319 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1422,10 +1350,6 @@ xfs_bmap_add_extent_unwritten_real(
1422 newext))) 1350 newext)))
1423 goto done; 1351 goto done;
1424 } 1352 }
1425 /* DELTA: Two in-core extents are replaced by one. */
1426 temp = PREV.br_startoff;
1427 temp2 = PREV.br_blockcount +
1428 RIGHT.br_blockcount;
1429 break; 1353 break;
1430 1354
1431 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 1355 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1453,9 +1377,6 @@ xfs_bmap_add_extent_unwritten_real(
1453 newext))) 1377 newext)))
1454 goto done; 1378 goto done;
1455 } 1379 }
1456 /* DELTA: The in-core extent described by new changed type. */
1457 temp = new->br_startoff;
1458 temp2 = new->br_blockcount;
1459 break; 1380 break;
1460 1381
1461 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 1382 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1501,10 +1422,6 @@ xfs_bmap_add_extent_unwritten_real(
1501 LEFT.br_state)) 1422 LEFT.br_state))
1502 goto done; 1423 goto done;
1503 } 1424 }
1504 /* DELTA: The boundary between two in-core extents moved. */
1505 temp = LEFT.br_startoff;
1506 temp2 = LEFT.br_blockcount +
1507 PREV.br_blockcount;
1508 break; 1425 break;
1509 1426
1510 case BMAP_LEFT_FILLING: 1427 case BMAP_LEFT_FILLING:
@@ -1544,9 +1461,6 @@ xfs_bmap_add_extent_unwritten_real(
1544 goto done; 1461 goto done;
1545 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1462 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1546 } 1463 }
1547 /* DELTA: One in-core extent is split in two. */
1548 temp = PREV.br_startoff;
1549 temp2 = PREV.br_blockcount;
1550 break; 1464 break;
1551 1465
1552 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1466 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1587,10 +1501,6 @@ xfs_bmap_add_extent_unwritten_real(
1587 newext))) 1501 newext)))
1588 goto done; 1502 goto done;
1589 } 1503 }
1590 /* DELTA: The boundary between two in-core extents moved. */
1591 temp = PREV.br_startoff;
1592 temp2 = PREV.br_blockcount +
1593 RIGHT.br_blockcount;
1594 break; 1504 break;
1595 1505
1596 case BMAP_RIGHT_FILLING: 1506 case BMAP_RIGHT_FILLING:
@@ -1630,9 +1540,6 @@ xfs_bmap_add_extent_unwritten_real(
1630 goto done; 1540 goto done;
1631 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1541 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1632 } 1542 }
1633 /* DELTA: One in-core extent is split in two. */
1634 temp = PREV.br_startoff;
1635 temp2 = PREV.br_blockcount;
1636 break; 1543 break;
1637 1544
1638 case 0: 1545 case 0:
@@ -1692,9 +1599,6 @@ xfs_bmap_add_extent_unwritten_real(
1692 goto done; 1599 goto done;
1693 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1600 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1694 } 1601 }
1695 /* DELTA: One in-core extent is split in three. */
1696 temp = PREV.br_startoff;
1697 temp2 = PREV.br_blockcount;
1698 break; 1602 break;
1699 1603
1700 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1604 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1710,13 +1614,6 @@ xfs_bmap_add_extent_unwritten_real(
1710 ASSERT(0); 1614 ASSERT(0);
1711 } 1615 }
1712 *curp = cur; 1616 *curp = cur;
1713 if (delta) {
1714 temp2 += temp;
1715 if (delta->xed_startoff > temp)
1716 delta->xed_startoff = temp;
1717 if (delta->xed_blockcount < temp2)
1718 delta->xed_blockcount = temp2;
1719 }
1720done: 1617done:
1721 *logflagsp = rval; 1618 *logflagsp = rval;
1722 return error; 1619 return error;
@@ -1736,7 +1633,6 @@ xfs_bmap_add_extent_hole_delay(
1736 xfs_extnum_t idx, /* extent number to update/insert */ 1633 xfs_extnum_t idx, /* extent number to update/insert */
1737 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1634 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1738 int *logflagsp, /* inode logging flags */ 1635 int *logflagsp, /* inode logging flags */
1739 xfs_extdelta_t *delta, /* Change made to incore extents */
1740 int rsvd) /* OK to allocate reserved blocks */ 1636 int rsvd) /* OK to allocate reserved blocks */
1741{ 1637{
1742 xfs_bmbt_rec_host_t *ep; /* extent record for idx */ 1638 xfs_bmbt_rec_host_t *ep; /* extent record for idx */
@@ -1747,7 +1643,6 @@ xfs_bmap_add_extent_hole_delay(
1747 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 1643 xfs_bmbt_irec_t right; /* right neighbor extent entry */
1748 int state; /* state bits, accessed thru macros */ 1644 int state; /* state bits, accessed thru macros */
1749 xfs_filblks_t temp=0; /* temp for indirect calculations */ 1645 xfs_filblks_t temp=0; /* temp for indirect calculations */
1750 xfs_filblks_t temp2=0;
1751 1646
1752 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1647 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1753 ep = xfs_iext_get_ext(ifp, idx); 1648 ep = xfs_iext_get_ext(ifp, idx);
@@ -1819,9 +1714,6 @@ xfs_bmap_add_extent_hole_delay(
1819 1714
1820 xfs_iext_remove(ip, idx, 1, state); 1715 xfs_iext_remove(ip, idx, 1, state);
1821 ip->i_df.if_lastex = idx - 1; 1716 ip->i_df.if_lastex = idx - 1;
1822 /* DELTA: Two in-core extents were replaced by one. */
1823 temp2 = temp;
1824 temp = left.br_startoff;
1825 break; 1717 break;
1826 1718
1827 case BMAP_LEFT_CONTIG: 1719 case BMAP_LEFT_CONTIG:
@@ -1841,9 +1733,6 @@ xfs_bmap_add_extent_hole_delay(
1841 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1733 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1842 1734
1843 ip->i_df.if_lastex = idx - 1; 1735 ip->i_df.if_lastex = idx - 1;
1844 /* DELTA: One in-core extent grew into a hole. */
1845 temp2 = temp;
1846 temp = left.br_startoff;
1847 break; 1736 break;
1848 1737
1849 case BMAP_RIGHT_CONTIG: 1738 case BMAP_RIGHT_CONTIG:
@@ -1862,9 +1751,6 @@ xfs_bmap_add_extent_hole_delay(
1862 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1751 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1863 1752
1864 ip->i_df.if_lastex = idx; 1753 ip->i_df.if_lastex = idx;
1865 /* DELTA: One in-core extent grew into a hole. */
1866 temp2 = temp;
1867 temp = new->br_startoff;
1868 break; 1754 break;
1869 1755
1870 case 0: 1756 case 0:
@@ -1876,9 +1762,6 @@ xfs_bmap_add_extent_hole_delay(
1876 oldlen = newlen = 0; 1762 oldlen = newlen = 0;
1877 xfs_iext_insert(ip, idx, 1, new, state); 1763 xfs_iext_insert(ip, idx, 1, new, state);
1878 ip->i_df.if_lastex = idx; 1764 ip->i_df.if_lastex = idx;
1879 /* DELTA: A new in-core extent was added in a hole. */
1880 temp2 = new->br_blockcount;
1881 temp = new->br_startoff;
1882 break; 1765 break;
1883 } 1766 }
1884 if (oldlen != newlen) { 1767 if (oldlen != newlen) {
@@ -1889,13 +1772,6 @@ xfs_bmap_add_extent_hole_delay(
1889 * Nothing to do for disk quota accounting here. 1772 * Nothing to do for disk quota accounting here.
1890 */ 1773 */
1891 } 1774 }
1892 if (delta) {
1893 temp2 += temp;
1894 if (delta->xed_startoff > temp)
1895 delta->xed_startoff = temp;
1896 if (delta->xed_blockcount < temp2)
1897 delta->xed_blockcount = temp2;
1898 }
1899 *logflagsp = 0; 1775 *logflagsp = 0;
1900 return 0; 1776 return 0;
1901} 1777}
@@ -1911,7 +1787,6 @@ xfs_bmap_add_extent_hole_real(
1911 xfs_btree_cur_t *cur, /* if null, not a btree */ 1787 xfs_btree_cur_t *cur, /* if null, not a btree */
1912 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1788 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1913 int *logflagsp, /* inode logging flags */ 1789 int *logflagsp, /* inode logging flags */
1914 xfs_extdelta_t *delta, /* Change made to incore extents */
1915 int whichfork) /* data or attr fork */ 1790 int whichfork) /* data or attr fork */
1916{ 1791{
1917 xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ 1792 xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */
@@ -1922,8 +1797,6 @@ xfs_bmap_add_extent_hole_real(
1922 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 1797 xfs_bmbt_irec_t right; /* right neighbor extent entry */
1923 int rval=0; /* return value (logging flags) */ 1798 int rval=0; /* return value (logging flags) */
1924 int state; /* state bits, accessed thru macros */ 1799 int state; /* state bits, accessed thru macros */
1925 xfs_filblks_t temp=0;
1926 xfs_filblks_t temp2=0;
1927 1800
1928 ifp = XFS_IFORK_PTR(ip, whichfork); 1801 ifp = XFS_IFORK_PTR(ip, whichfork);
1929 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 1802 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
@@ -2020,11 +1893,6 @@ xfs_bmap_add_extent_hole_real(
2020 left.br_state))) 1893 left.br_state)))
2021 goto done; 1894 goto done;
2022 } 1895 }
2023 /* DELTA: Two in-core extents were replaced by one. */
2024 temp = left.br_startoff;
2025 temp2 = left.br_blockcount +
2026 new->br_blockcount +
2027 right.br_blockcount;
2028 break; 1896 break;
2029 1897
2030 case BMAP_LEFT_CONTIG: 1898 case BMAP_LEFT_CONTIG:
@@ -2056,10 +1924,6 @@ xfs_bmap_add_extent_hole_real(
2056 left.br_state))) 1924 left.br_state)))
2057 goto done; 1925 goto done;
2058 } 1926 }
2059 /* DELTA: One in-core extent grew. */
2060 temp = left.br_startoff;
2061 temp2 = left.br_blockcount +
2062 new->br_blockcount;
2063 break; 1927 break;
2064 1928
2065 case BMAP_RIGHT_CONTIG: 1929 case BMAP_RIGHT_CONTIG:
@@ -2092,10 +1956,6 @@ xfs_bmap_add_extent_hole_real(
2092 right.br_state))) 1956 right.br_state)))
2093 goto done; 1957 goto done;
2094 } 1958 }
2095 /* DELTA: One in-core extent grew. */
2096 temp = new->br_startoff;
2097 temp2 = new->br_blockcount +
2098 right.br_blockcount;
2099 break; 1959 break;
2100 1960
2101 case 0: 1961 case 0:
@@ -2123,18 +1983,8 @@ xfs_bmap_add_extent_hole_real(
2123 goto done; 1983 goto done;
2124 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1984 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2125 } 1985 }
2126 /* DELTA: A new extent was added in a hole. */
2127 temp = new->br_startoff;
2128 temp2 = new->br_blockcount;
2129 break; 1986 break;
2130 } 1987 }
2131 if (delta) {
2132 temp2 += temp;
2133 if (delta->xed_startoff > temp)
2134 delta->xed_startoff = temp;
2135 if (delta->xed_blockcount < temp2)
2136 delta->xed_blockcount = temp2;
2137 }
2138done: 1988done:
2139 *logflagsp = rval; 1989 *logflagsp = rval;
2140 return error; 1990 return error;
@@ -2959,7 +2809,6 @@ xfs_bmap_del_extent(
2959 xfs_btree_cur_t *cur, /* if null, not a btree */ 2809 xfs_btree_cur_t *cur, /* if null, not a btree */
2960 xfs_bmbt_irec_t *del, /* data to remove from extents */ 2810 xfs_bmbt_irec_t *del, /* data to remove from extents */
2961 int *logflagsp, /* inode logging flags */ 2811 int *logflagsp, /* inode logging flags */
2962 xfs_extdelta_t *delta, /* Change made to incore extents */
2963 int whichfork, /* data or attr fork */ 2812 int whichfork, /* data or attr fork */
2964 int rsvd) /* OK to allocate reserved blocks */ 2813 int rsvd) /* OK to allocate reserved blocks */
2965{ 2814{
@@ -3265,14 +3114,6 @@ xfs_bmap_del_extent(
3265 if (da_old > da_new) 3114 if (da_old > da_new)
3266 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new), 3115 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new),
3267 rsvd); 3116 rsvd);
3268 if (delta) {
3269 /* DELTA: report the original extent. */
3270 if (delta->xed_startoff > got.br_startoff)
3271 delta->xed_startoff = got.br_startoff;
3272 if (delta->xed_blockcount < got.br_startoff+got.br_blockcount)
3273 delta->xed_blockcount = got.br_startoff +
3274 got.br_blockcount;
3275 }
3276done: 3117done:
3277 *logflagsp = flags; 3118 *logflagsp = flags;
3278 return error; 3119 return error;
@@ -3754,9 +3595,10 @@ xfs_bmap_add_attrfork(
3754 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 3595 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
3755 } 3596 }
3756 ASSERT(ip->i_d.di_anextents == 0); 3597 ASSERT(ip->i_d.di_anextents == 0);
3757 IHOLD(ip); 3598
3758 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3599 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
3759 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3600 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3601
3760 switch (ip->i_d.di_format) { 3602 switch (ip->i_d.di_format) {
3761 case XFS_DINODE_FMT_DEV: 3603 case XFS_DINODE_FMT_DEV:
3762 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 3604 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
@@ -4483,8 +4325,7 @@ xfs_bmapi(
4483 xfs_extlen_t total, /* total blocks needed */ 4325 xfs_extlen_t total, /* total blocks needed */
4484 xfs_bmbt_irec_t *mval, /* output: map values */ 4326 xfs_bmbt_irec_t *mval, /* output: map values */
4485 int *nmap, /* i/o: mval size/count */ 4327 int *nmap, /* i/o: mval size/count */
4486 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 4328 xfs_bmap_free_t *flist) /* i/o: list extents to free */
4487 xfs_extdelta_t *delta) /* o: change made to incore extents */
4488{ 4329{
4489 xfs_fsblock_t abno; /* allocated block number */ 4330 xfs_fsblock_t abno; /* allocated block number */
4490 xfs_extlen_t alen; /* allocated extent length */ 4331 xfs_extlen_t alen; /* allocated extent length */
@@ -4596,10 +4437,7 @@ xfs_bmapi(
4596 end = bno + len; 4437 end = bno + len;
4597 obno = bno; 4438 obno = bno;
4598 bma.ip = NULL; 4439 bma.ip = NULL;
4599 if (delta) { 4440
4600 delta->xed_startoff = NULLFILEOFF;
4601 delta->xed_blockcount = 0;
4602 }
4603 while (bno < end && n < *nmap) { 4441 while (bno < end && n < *nmap) {
4604 /* 4442 /*
4605 * Reading past eof, act as though there's a hole 4443 * Reading past eof, act as though there's a hole
@@ -4620,19 +4458,13 @@ xfs_bmapi(
4620 * allocate the stuff asked for in this bmap call 4458 * allocate the stuff asked for in this bmap call
4621 * but that wouldn't be as good. 4459 * but that wouldn't be as good.
4622 */ 4460 */
4623 if (wasdelay && !(flags & XFS_BMAPI_EXACT)) { 4461 if (wasdelay) {
4624 alen = (xfs_extlen_t)got.br_blockcount; 4462 alen = (xfs_extlen_t)got.br_blockcount;
4625 aoff = got.br_startoff; 4463 aoff = got.br_startoff;
4626 if (lastx != NULLEXTNUM && lastx) { 4464 if (lastx != NULLEXTNUM && lastx) {
4627 ep = xfs_iext_get_ext(ifp, lastx - 1); 4465 ep = xfs_iext_get_ext(ifp, lastx - 1);
4628 xfs_bmbt_get_all(ep, &prev); 4466 xfs_bmbt_get_all(ep, &prev);
4629 } 4467 }
4630 } else if (wasdelay) {
4631 alen = (xfs_extlen_t)
4632 XFS_FILBLKS_MIN(len,
4633 (got.br_startoff +
4634 got.br_blockcount) - bno);
4635 aoff = bno;
4636 } else { 4468 } else {
4637 alen = (xfs_extlen_t) 4469 alen = (xfs_extlen_t)
4638 XFS_FILBLKS_MIN(len, MAXEXTLEN); 4470 XFS_FILBLKS_MIN(len, MAXEXTLEN);
@@ -4831,7 +4663,7 @@ xfs_bmapi(
4831 got.br_state = XFS_EXT_UNWRITTEN; 4663 got.br_state = XFS_EXT_UNWRITTEN;
4832 } 4664 }
4833 error = xfs_bmap_add_extent(ip, lastx, &cur, &got, 4665 error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
4834 firstblock, flist, &tmp_logflags, delta, 4666 firstblock, flist, &tmp_logflags,
4835 whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); 4667 whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
4836 logflags |= tmp_logflags; 4668 logflags |= tmp_logflags;
4837 if (error) 4669 if (error)
@@ -4927,7 +4759,7 @@ xfs_bmapi(
4927 } 4759 }
4928 mval->br_state = XFS_EXT_NORM; 4760 mval->br_state = XFS_EXT_NORM;
4929 error = xfs_bmap_add_extent(ip, lastx, &cur, mval, 4761 error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
4930 firstblock, flist, &tmp_logflags, delta, 4762 firstblock, flist, &tmp_logflags,
4931 whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); 4763 whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
4932 logflags |= tmp_logflags; 4764 logflags |= tmp_logflags;
4933 if (error) 4765 if (error)
@@ -5017,14 +4849,6 @@ xfs_bmapi(
5017 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || 4849 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
5018 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); 4850 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
5019 error = 0; 4851 error = 0;
5020 if (delta && delta->xed_startoff != NULLFILEOFF) {
5021 /* A change was actually made.
5022 * Note that delta->xed_blockount is an offset at this
5023 * point and needs to be converted to a block count.
5024 */
5025 ASSERT(delta->xed_blockcount > delta->xed_startoff);
5026 delta->xed_blockcount -= delta->xed_startoff;
5027 }
5028error0: 4852error0:
5029 /* 4853 /*
5030 * Log everything. Do this after conversion, there's no point in 4854 * Log everything. Do this after conversion, there's no point in
@@ -5136,8 +4960,6 @@ xfs_bunmapi(
5136 xfs_fsblock_t *firstblock, /* first allocated block 4960 xfs_fsblock_t *firstblock, /* first allocated block
5137 controls a.g. for allocs */ 4961 controls a.g. for allocs */
5138 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 4962 xfs_bmap_free_t *flist, /* i/o: list extents to free */
5139 xfs_extdelta_t *delta, /* o: change made to incore
5140 extents */
5141 int *done) /* set if not done yet */ 4963 int *done) /* set if not done yet */
5142{ 4964{
5143 xfs_btree_cur_t *cur; /* bmap btree cursor */ 4965 xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -5196,10 +5018,7 @@ xfs_bunmapi(
5196 bno = start + len - 1; 5018 bno = start + len - 1;
5197 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, 5019 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5198 &prev); 5020 &prev);
5199 if (delta) { 5021
5200 delta->xed_startoff = NULLFILEOFF;
5201 delta->xed_blockcount = 0;
5202 }
5203 /* 5022 /*
5204 * Check to see if the given block number is past the end of the 5023 * Check to see if the given block number is past the end of the
5205 * file, back up to the last block if so... 5024 * file, back up to the last block if so...
@@ -5297,7 +5116,7 @@ xfs_bunmapi(
5297 } 5116 }
5298 del.br_state = XFS_EXT_UNWRITTEN; 5117 del.br_state = XFS_EXT_UNWRITTEN;
5299 error = xfs_bmap_add_extent(ip, lastx, &cur, &del, 5118 error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
5300 firstblock, flist, &logflags, delta, 5119 firstblock, flist, &logflags,
5301 XFS_DATA_FORK, 0); 5120 XFS_DATA_FORK, 0);
5302 if (error) 5121 if (error)
5303 goto error0; 5122 goto error0;
@@ -5352,7 +5171,7 @@ xfs_bunmapi(
5352 prev.br_state = XFS_EXT_UNWRITTEN; 5171 prev.br_state = XFS_EXT_UNWRITTEN;
5353 error = xfs_bmap_add_extent(ip, lastx - 1, &cur, 5172 error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
5354 &prev, firstblock, flist, &logflags, 5173 &prev, firstblock, flist, &logflags,
5355 delta, XFS_DATA_FORK, 0); 5174 XFS_DATA_FORK, 0);
5356 if (error) 5175 if (error)
5357 goto error0; 5176 goto error0;
5358 goto nodelete; 5177 goto nodelete;
@@ -5361,7 +5180,7 @@ xfs_bunmapi(
5361 del.br_state = XFS_EXT_UNWRITTEN; 5180 del.br_state = XFS_EXT_UNWRITTEN;
5362 error = xfs_bmap_add_extent(ip, lastx, &cur, 5181 error = xfs_bmap_add_extent(ip, lastx, &cur,
5363 &del, firstblock, flist, &logflags, 5182 &del, firstblock, flist, &logflags,
5364 delta, XFS_DATA_FORK, 0); 5183 XFS_DATA_FORK, 0);
5365 if (error) 5184 if (error)
5366 goto error0; 5185 goto error0;
5367 goto nodelete; 5186 goto nodelete;
@@ -5414,7 +5233,7 @@ xfs_bunmapi(
5414 goto error0; 5233 goto error0;
5415 } 5234 }
5416 error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, 5235 error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
5417 &tmp_logflags, delta, whichfork, rsvd); 5236 &tmp_logflags, whichfork, rsvd);
5418 logflags |= tmp_logflags; 5237 logflags |= tmp_logflags;
5419 if (error) 5238 if (error)
5420 goto error0; 5239 goto error0;
@@ -5471,14 +5290,6 @@ nodelete:
5471 ASSERT(ifp->if_ext_max == 5290 ASSERT(ifp->if_ext_max ==
5472 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); 5291 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5473 error = 0; 5292 error = 0;
5474 if (delta && delta->xed_startoff != NULLFILEOFF) {
5475 /* A change was actually made.
5476 * Note that delta->xed_blockount is an offset at this
5477 * point and needs to be converted to a block count.
5478 */
5479 ASSERT(delta->xed_blockcount > delta->xed_startoff);
5480 delta->xed_blockcount -= delta->xed_startoff;
5481 }
5482error0: 5293error0:
5483 /* 5294 /*
5484 * Log everything. Do this after conversion, there's no point in 5295 * Log everything. Do this after conversion, there's no point in
@@ -5605,28 +5416,6 @@ xfs_getbmap(
5605 prealloced = 0; 5416 prealloced = 0;
5606 fixlen = 1LL << 32; 5417 fixlen = 1LL << 32;
5607 } else { 5418 } else {
5608 /*
5609 * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
5610 * not generate a DMAPI read event. Otherwise, if the
5611 * DM_EVENT_READ bit is set for the file, generate a read
5612 * event in order that the DMAPI application may do its thing
5613 * before we return the extents. Usually this means restoring
5614 * user file data to regions of the file that look like holes.
5615 *
5616 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
5617 * BMV_IF_NO_DMAPI_READ so that read events are generated.
5618 * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
5619 * could misinterpret holes in a DMAPI file as true holes,
5620 * when in fact they may represent offline user data.
5621 */
5622 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
5623 !(iflags & BMV_IF_NO_DMAPI_READ)) {
5624 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
5625 0, 0, 0, NULL);
5626 if (error)
5627 return XFS_ERROR(error);
5628 }
5629
5630 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 5419 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
5631 ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 5420 ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
5632 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5421 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
@@ -5713,7 +5502,7 @@ xfs_getbmap(
5713 error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 5502 error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
5714 XFS_BB_TO_FSB(mp, bmv->bmv_length), 5503 XFS_BB_TO_FSB(mp, bmv->bmv_length),
5715 bmapi_flags, NULL, 0, map, &nmap, 5504 bmapi_flags, NULL, 0, map, &nmap,
5716 NULL, NULL); 5505 NULL);
5717 if (error) 5506 if (error)
5718 goto out_free_map; 5507 goto out_free_map;
5719 ASSERT(nmap <= subnex); 5508 ASSERT(nmap <= subnex);
@@ -5859,66 +5648,34 @@ xfs_bmap_eof(
5859} 5648}
5860 5649
5861#ifdef DEBUG 5650#ifdef DEBUG
5862STATIC 5651STATIC struct xfs_buf *
5863xfs_buf_t *
5864xfs_bmap_get_bp( 5652xfs_bmap_get_bp(
5865 xfs_btree_cur_t *cur, 5653 struct xfs_btree_cur *cur,
5866 xfs_fsblock_t bno) 5654 xfs_fsblock_t bno)
5867{ 5655{
5868 int i; 5656 struct xfs_log_item_desc *lidp;
5869 xfs_buf_t *bp; 5657 int i;
5870 5658
5871 if (!cur) 5659 if (!cur)
5872 return(NULL); 5660 return NULL;
5873
5874 bp = NULL;
5875 for(i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5876 bp = cur->bc_bufs[i];
5877 if (!bp) break;
5878 if (XFS_BUF_ADDR(bp) == bno)
5879 break; /* Found it */
5880 }
5881 if (i == XFS_BTREE_MAXLEVELS)
5882 bp = NULL;
5883
5884 if (!bp) { /* Chase down all the log items to see if the bp is there */
5885 xfs_log_item_chunk_t *licp;
5886 xfs_trans_t *tp;
5887
5888 tp = cur->bc_tp;
5889 licp = &tp->t_items;
5890 while (!bp && licp != NULL) {
5891 if (xfs_lic_are_all_free(licp)) {
5892 licp = licp->lic_next;
5893 continue;
5894 }
5895 for (i = 0; i < licp->lic_unused; i++) {
5896 xfs_log_item_desc_t *lidp;
5897 xfs_log_item_t *lip;
5898 xfs_buf_log_item_t *bip;
5899 xfs_buf_t *lbp;
5900
5901 if (xfs_lic_isfree(licp, i)) {
5902 continue;
5903 }
5904
5905 lidp = xfs_lic_slot(licp, i);
5906 lip = lidp->lid_item;
5907 if (lip->li_type != XFS_LI_BUF)
5908 continue;
5909 5661
5910 bip = (xfs_buf_log_item_t *)lip; 5662 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5911 lbp = bip->bli_buf; 5663 if (!cur->bc_bufs[i])
5664 break;
5665 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
5666 return cur->bc_bufs[i];
5667 }
5912 5668
5913 if (XFS_BUF_ADDR(lbp) == bno) { 5669 /* Chase down all the log items to see if the bp is there */
5914 bp = lbp; 5670 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
5915 break; /* Found it */ 5671 struct xfs_buf_log_item *bip;
5916 } 5672 bip = (struct xfs_buf_log_item *)lidp->lid_item;
5917 } 5673 if (bip->bli_item.li_type == XFS_LI_BUF &&
5918 licp = licp->lic_next; 5674 XFS_BUF_ADDR(bip->bli_buf) == bno)
5919 } 5675 return bip->bli_buf;
5920 } 5676 }
5921 return(bp); 5677
5678 return NULL;
5922} 5679}
5923 5680
5924STATIC void 5681STATIC void
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 419dafb9d87d..b13569a6179b 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -28,20 +28,6 @@ struct xfs_trans;
28extern kmem_zone_t *xfs_bmap_free_item_zone; 28extern kmem_zone_t *xfs_bmap_free_item_zone;
29 29
30/* 30/*
31 * DELTA: describe a change to the in-core extent list.
32 *
33 * Internally the use of xed_blockount is somewhat funky.
34 * xed_blockcount contains an offset much of the time because this
35 * makes merging changes easier. (xfs_fileoff_t and xfs_filblks_t are
36 * the same underlying type).
37 */
38typedef struct xfs_extdelta
39{
40 xfs_fileoff_t xed_startoff; /* offset of range */
41 xfs_filblks_t xed_blockcount; /* blocks in range */
42} xfs_extdelta_t;
43
44/*
45 * List of extents to be free "later". 31 * List of extents to be free "later".
46 * The list is kept sorted on xbf_startblock. 32 * The list is kept sorted on xbf_startblock.
47 */ 33 */
@@ -82,16 +68,13 @@ typedef struct xfs_bmap_free
82#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */ 68#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */
83#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ 69#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */
84#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ 70#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */
85#define XFS_BMAPI_EXACT 0x010 /* allocate only to spec'd bounds */ 71#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */
86#define XFS_BMAPI_ATTRFORK 0x020 /* use attribute fork not data */ 72#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */
87#define XFS_BMAPI_ASYNC 0x040 /* bunmapi xactions can be async */ 73#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */
88#define XFS_BMAPI_RSVBLOCKS 0x080 /* OK to alloc. reserved data blocks */ 74#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */
89#define XFS_BMAPI_PREALLOC 0x100 /* preallocation op: unwritten space */
90#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */
91 /* combine contig. space */ 75 /* combine contig. space */
92#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ 76#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */
93/* XFS_BMAPI_DIRECT_IO 0x800 */ 77#define XFS_BMAPI_CONVERT 0x200 /* unwritten extent conversion - */
94#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */
95 /* need write cache flushing and no */ 78 /* need write cache flushing and no */
96 /* additional allocation alignments */ 79 /* additional allocation alignments */
97 80
@@ -100,9 +83,7 @@ typedef struct xfs_bmap_free
100 { XFS_BMAPI_DELAY, "DELAY" }, \ 83 { XFS_BMAPI_DELAY, "DELAY" }, \
101 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 84 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
102 { XFS_BMAPI_METADATA, "METADATA" }, \ 85 { XFS_BMAPI_METADATA, "METADATA" }, \
103 { XFS_BMAPI_EXACT, "EXACT" }, \
104 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ 86 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
105 { XFS_BMAPI_ASYNC, "ASYNC" }, \
106 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ 87 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \
107 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 88 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
108 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 89 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
@@ -310,9 +291,7 @@ xfs_bmapi(
310 xfs_extlen_t total, /* total blocks needed */ 291 xfs_extlen_t total, /* total blocks needed */
311 struct xfs_bmbt_irec *mval, /* output: map values */ 292 struct xfs_bmbt_irec *mval, /* output: map values */
312 int *nmap, /* i/o: mval size/count */ 293 int *nmap, /* i/o: mval size/count */
313 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 294 xfs_bmap_free_t *flist); /* i/o: list extents to free */
314 xfs_extdelta_t *delta); /* o: change made to incore
315 extents */
316 295
317/* 296/*
318 * Map file blocks to filesystem blocks, simple version. 297 * Map file blocks to filesystem blocks, simple version.
@@ -346,8 +325,6 @@ xfs_bunmapi(
346 xfs_fsblock_t *firstblock, /* first allocated block 325 xfs_fsblock_t *firstblock, /* first allocated block
347 controls a.g. for allocs */ 326 controls a.g. for allocs */
348 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 327 xfs_bmap_free_t *flist, /* i/o: list extents to free */
349 xfs_extdelta_t *delta, /* o: change made to incore
350 extents */
351 int *done); /* set if not done yet */ 328 int *done); /* set if not done yet */
352 329
353/* 330/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 416e47e54b83..87d3c10b6954 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -24,21 +24,16 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
38#include "xfs_alloc.h" 34#include "xfs_alloc.h"
39#include "xfs_btree.h" 35#include "xfs_btree.h"
40#include "xfs_btree_trace.h" 36#include "xfs_btree_trace.h"
41#include "xfs_ialloc.h"
42#include "xfs_itable.h" 37#include "xfs_itable.h"
43#include "xfs_bmap.h" 38#include "xfs_bmap.h"
44#include "xfs_error.h" 39#include "xfs_error.h"
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 96be4b0f2496..829af92f0fba 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -24,20 +24,15 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
38#include "xfs_btree.h" 34#include "xfs_btree.h"
39#include "xfs_btree_trace.h" 35#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h"
41#include "xfs_error.h" 36#include "xfs_error.h"
42#include "xfs_trace.h" 37#include "xfs_trace.h"
43 38
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 02a80984aa05..1b09d7a280df 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_buf_item.h" 28#include "xfs_buf_item.h"
30#include "xfs_trans_priv.h" 29#include "xfs_trans_priv.h"
@@ -34,6 +33,12 @@
34 33
35kmem_zone_t *xfs_buf_item_zone; 34kmem_zone_t *xfs_buf_item_zone;
36 35
36static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
37{
38 return container_of(lip, struct xfs_buf_log_item, bli_item);
39}
40
41
37#ifdef XFS_TRANS_DEBUG 42#ifdef XFS_TRANS_DEBUG
38/* 43/*
39 * This function uses an alternate strategy for tracking the bytes 44 * This function uses an alternate strategy for tracking the bytes
@@ -151,12 +156,13 @@ STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
151 */ 156 */
152STATIC uint 157STATIC uint
153xfs_buf_item_size( 158xfs_buf_item_size(
154 xfs_buf_log_item_t *bip) 159 struct xfs_log_item *lip)
155{ 160{
156 uint nvecs; 161 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
157 int next_bit; 162 struct xfs_buf *bp = bip->bli_buf;
158 int last_bit; 163 uint nvecs;
159 xfs_buf_t *bp; 164 int next_bit;
165 int last_bit;
160 166
161 ASSERT(atomic_read(&bip->bli_refcount) > 0); 167 ASSERT(atomic_read(&bip->bli_refcount) > 0);
162 if (bip->bli_flags & XFS_BLI_STALE) { 168 if (bip->bli_flags & XFS_BLI_STALE) {
@@ -170,7 +176,6 @@ xfs_buf_item_size(
170 return 1; 176 return 1;
171 } 177 }
172 178
173 bp = bip->bli_buf;
174 ASSERT(bip->bli_flags & XFS_BLI_LOGGED); 179 ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
175 nvecs = 1; 180 nvecs = 1;
176 last_bit = xfs_next_bit(bip->bli_format.blf_data_map, 181 last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
@@ -219,13 +224,13 @@ xfs_buf_item_size(
219 */ 224 */
220STATIC void 225STATIC void
221xfs_buf_item_format( 226xfs_buf_item_format(
222 xfs_buf_log_item_t *bip, 227 struct xfs_log_item *lip,
223 xfs_log_iovec_t *log_vector) 228 struct xfs_log_iovec *vecp)
224{ 229{
230 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
231 struct xfs_buf *bp = bip->bli_buf;
225 uint base_size; 232 uint base_size;
226 uint nvecs; 233 uint nvecs;
227 xfs_log_iovec_t *vecp;
228 xfs_buf_t *bp;
229 int first_bit; 234 int first_bit;
230 int last_bit; 235 int last_bit;
231 int next_bit; 236 int next_bit;
@@ -235,8 +240,6 @@ xfs_buf_item_format(
235 ASSERT(atomic_read(&bip->bli_refcount) > 0); 240 ASSERT(atomic_read(&bip->bli_refcount) > 0);
236 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 241 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
237 (bip->bli_flags & XFS_BLI_STALE)); 242 (bip->bli_flags & XFS_BLI_STALE));
238 bp = bip->bli_buf;
239 vecp = log_vector;
240 243
241 /* 244 /*
242 * The size of the base structure is the size of the 245 * The size of the base structure is the size of the
@@ -248,7 +251,7 @@ xfs_buf_item_format(
248 base_size = 251 base_size =
249 (uint)(sizeof(xfs_buf_log_format_t) + 252 (uint)(sizeof(xfs_buf_log_format_t) +
250 ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); 253 ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
251 vecp->i_addr = (xfs_caddr_t)&bip->bli_format; 254 vecp->i_addr = &bip->bli_format;
252 vecp->i_len = base_size; 255 vecp->i_len = base_size;
253 vecp->i_type = XLOG_REG_TYPE_BFORMAT; 256 vecp->i_type = XLOG_REG_TYPE_BFORMAT;
254 vecp++; 257 vecp++;
@@ -263,7 +266,7 @@ xfs_buf_item_format(
263 */ 266 */
264 if (bip->bli_flags & XFS_BLI_INODE_BUF) { 267 if (bip->bli_flags & XFS_BLI_INODE_BUF) {
265 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 268 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
266 xfs_log_item_in_current_chkpt(&bip->bli_item))) 269 xfs_log_item_in_current_chkpt(lip)))
267 bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; 270 bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
268 bip->bli_flags &= ~XFS_BLI_INODE_BUF; 271 bip->bli_flags &= ~XFS_BLI_INODE_BUF;
269 } 272 }
@@ -356,66 +359,90 @@ xfs_buf_item_format(
356 359
357/* 360/*
358 * This is called to pin the buffer associated with the buf log item in memory 361 * This is called to pin the buffer associated with the buf log item in memory
359 * so it cannot be written out. Simply call bpin() on the buffer to do this. 362 * so it cannot be written out.
360 * 363 *
361 * We also always take a reference to the buffer log item here so that the bli 364 * We also always take a reference to the buffer log item here so that the bli
362 * is held while the item is pinned in memory. This means that we can 365 * is held while the item is pinned in memory. This means that we can
363 * unconditionally drop the reference count a transaction holds when the 366 * unconditionally drop the reference count a transaction holds when the
364 * transaction is completed. 367 * transaction is completed.
365 */ 368 */
366
367STATIC void 369STATIC void
368xfs_buf_item_pin( 370xfs_buf_item_pin(
369 xfs_buf_log_item_t *bip) 371 struct xfs_log_item *lip)
370{ 372{
371 xfs_buf_t *bp; 373 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
372 374
373 bp = bip->bli_buf; 375 ASSERT(XFS_BUF_ISBUSY(bip->bli_buf));
374 ASSERT(XFS_BUF_ISBUSY(bp));
375 ASSERT(atomic_read(&bip->bli_refcount) > 0); 376 ASSERT(atomic_read(&bip->bli_refcount) > 0);
376 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 377 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
377 (bip->bli_flags & XFS_BLI_STALE)); 378 (bip->bli_flags & XFS_BLI_STALE));
378 atomic_inc(&bip->bli_refcount); 379
379 trace_xfs_buf_item_pin(bip); 380 trace_xfs_buf_item_pin(bip);
380 xfs_bpin(bp);
381}
382 381
382 atomic_inc(&bip->bli_refcount);
383 atomic_inc(&bip->bli_buf->b_pin_count);
384}
383 385
384/* 386/*
385 * This is called to unpin the buffer associated with the buf log 387 * This is called to unpin the buffer associated with the buf log
386 * item which was previously pinned with a call to xfs_buf_item_pin(). 388 * item which was previously pinned with a call to xfs_buf_item_pin().
387 * Just call bunpin() on the buffer to do this.
388 * 389 *
389 * Also drop the reference to the buf item for the current transaction. 390 * Also drop the reference to the buf item for the current transaction.
390 * If the XFS_BLI_STALE flag is set and we are the last reference, 391 * If the XFS_BLI_STALE flag is set and we are the last reference,
391 * then free up the buf log item and unlock the buffer. 392 * then free up the buf log item and unlock the buffer.
393 *
394 * If the remove flag is set we are called from uncommit in the
395 * forced-shutdown path. If that is true and the reference count on
396 * the log item is going to drop to zero we need to free the item's
397 * descriptor in the transaction.
392 */ 398 */
393STATIC void 399STATIC void
394xfs_buf_item_unpin( 400xfs_buf_item_unpin(
395 xfs_buf_log_item_t *bip) 401 struct xfs_log_item *lip,
402 int remove)
396{ 403{
397 struct xfs_ail *ailp; 404 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
398 xfs_buf_t *bp; 405 xfs_buf_t *bp = bip->bli_buf;
399 int freed; 406 struct xfs_ail *ailp = lip->li_ailp;
400 int stale = bip->bli_flags & XFS_BLI_STALE; 407 int stale = bip->bli_flags & XFS_BLI_STALE;
408 int freed;
401 409
402 bp = bip->bli_buf;
403 ASSERT(bp != NULL);
404 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); 410 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
405 ASSERT(atomic_read(&bip->bli_refcount) > 0); 411 ASSERT(atomic_read(&bip->bli_refcount) > 0);
412
406 trace_xfs_buf_item_unpin(bip); 413 trace_xfs_buf_item_unpin(bip);
407 414
408 freed = atomic_dec_and_test(&bip->bli_refcount); 415 freed = atomic_dec_and_test(&bip->bli_refcount);
409 ailp = bip->bli_item.li_ailp; 416
410 xfs_bunpin(bp); 417 if (atomic_dec_and_test(&bp->b_pin_count))
418 wake_up_all(&bp->b_waiters);
419
411 if (freed && stale) { 420 if (freed && stale) {
412 ASSERT(bip->bli_flags & XFS_BLI_STALE); 421 ASSERT(bip->bli_flags & XFS_BLI_STALE);
413 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 422 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
414 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 423 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
415 ASSERT(XFS_BUF_ISSTALE(bp)); 424 ASSERT(XFS_BUF_ISSTALE(bp));
416 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 425 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
426
417 trace_xfs_buf_item_unpin_stale(bip); 427 trace_xfs_buf_item_unpin_stale(bip);
418 428
429 if (remove) {
430 /*
431 * We have to remove the log item from the transaction
432 * as we are about to release our reference to the
433 * buffer. If we don't, the unlock that occurs later
434 * in xfs_trans_uncommit() will ry to reference the
435 * buffer which we no longer have a hold on.
436 */
437 xfs_trans_del_item(lip);
438
439 /*
440 * Since the transaction no longer refers to the buffer,
441 * the buffer should no longer refer to the transaction.
442 */
443 XFS_BUF_SET_FSPRIVATE2(bp, NULL);
444 }
445
419 /* 446 /*
420 * If we get called here because of an IO error, we may 447 * If we get called here because of an IO error, we may
421 * or may not have the item on the AIL. xfs_trans_ail_delete() 448 * or may not have the item on the AIL. xfs_trans_ail_delete()
@@ -437,48 +464,6 @@ xfs_buf_item_unpin(
437} 464}
438 465
439/* 466/*
440 * this is called from uncommit in the forced-shutdown path.
441 * we need to check to see if the reference count on the log item
442 * is going to drop to zero. If so, unpin will free the log item
443 * so we need to free the item's descriptor (that points to the item)
444 * in the transaction.
445 */
446STATIC void
447xfs_buf_item_unpin_remove(
448 xfs_buf_log_item_t *bip,
449 xfs_trans_t *tp)
450{
451 /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */
452 if ((atomic_read(&bip->bli_refcount) == 1) &&
453 (bip->bli_flags & XFS_BLI_STALE)) {
454 /*
455 * yes -- We can safely do some work here and then call
456 * buf_item_unpin to do the rest because we are
457 * are holding the buffer locked so no one else will be
458 * able to bump up the refcount. We have to remove the
459 * log item from the transaction as we are about to release
460 * our reference to the buffer. If we don't, the unlock that
461 * occurs later in the xfs_trans_uncommit() will try to
462 * reference the buffer which we no longer have a hold on.
463 */
464 struct xfs_log_item_desc *lidp;
465
466 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
467 trace_xfs_buf_item_unpin_stale(bip);
468
469 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
470 xfs_trans_free_item(tp, lidp);
471
472 /*
473 * Since the transaction no longer refers to the buffer, the
474 * buffer should no longer refer to the transaction.
475 */
476 XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
477 }
478 xfs_buf_item_unpin(bip);
479}
480
481/*
482 * This is called to attempt to lock the buffer associated with this 467 * This is called to attempt to lock the buffer associated with this
483 * buf log item. Don't sleep on the buffer lock. If we can't get 468 * buf log item. Don't sleep on the buffer lock. If we can't get
484 * the lock right away, return 0. If we can get the lock, take a 469 * the lock right away, return 0. If we can get the lock, take a
@@ -488,11 +473,11 @@ xfs_buf_item_unpin_remove(
488 */ 473 */
489STATIC uint 474STATIC uint
490xfs_buf_item_trylock( 475xfs_buf_item_trylock(
491 xfs_buf_log_item_t *bip) 476 struct xfs_log_item *lip)
492{ 477{
493 xfs_buf_t *bp; 478 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
479 struct xfs_buf *bp = bip->bli_buf;
494 480
495 bp = bip->bli_buf;
496 if (XFS_BUF_ISPINNED(bp)) 481 if (XFS_BUF_ISPINNED(bp))
497 return XFS_ITEM_PINNED; 482 return XFS_ITEM_PINNED;
498 if (!XFS_BUF_CPSEMA(bp)) 483 if (!XFS_BUF_CPSEMA(bp))
@@ -529,13 +514,12 @@ xfs_buf_item_trylock(
529 */ 514 */
530STATIC void 515STATIC void
531xfs_buf_item_unlock( 516xfs_buf_item_unlock(
532 xfs_buf_log_item_t *bip) 517 struct xfs_log_item *lip)
533{ 518{
534 int aborted; 519 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
535 xfs_buf_t *bp; 520 struct xfs_buf *bp = bip->bli_buf;
536 uint hold; 521 int aborted;
537 522 uint hold;
538 bp = bip->bli_buf;
539 523
540 /* Clear the buffer's association with this transaction. */ 524 /* Clear the buffer's association with this transaction. */
541 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 525 XFS_BUF_SET_FSPRIVATE2(bp, NULL);
@@ -546,7 +530,7 @@ xfs_buf_item_unlock(
546 * (cancelled) buffers at unpin time, but we'll never go through the 530 * (cancelled) buffers at unpin time, but we'll never go through the
547 * pin/unpin cycle if we abort inside commit. 531 * pin/unpin cycle if we abort inside commit.
548 */ 532 */
549 aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; 533 aborted = (lip->li_flags & XFS_LI_ABORTED) != 0;
550 534
551 /* 535 /*
552 * Before possibly freeing the buf item, determine if we should 536 * Before possibly freeing the buf item, determine if we should
@@ -607,16 +591,16 @@ xfs_buf_item_unlock(
607 */ 591 */
608STATIC xfs_lsn_t 592STATIC xfs_lsn_t
609xfs_buf_item_committed( 593xfs_buf_item_committed(
610 xfs_buf_log_item_t *bip, 594 struct xfs_log_item *lip,
611 xfs_lsn_t lsn) 595 xfs_lsn_t lsn)
612{ 596{
597 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
598
613 trace_xfs_buf_item_committed(bip); 599 trace_xfs_buf_item_committed(bip);
614 600
615 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 601 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0)
616 (bip->bli_item.li_lsn != 0)) { 602 return lip->li_lsn;
617 return bip->bli_item.li_lsn; 603 return lsn;
618 }
619 return (lsn);
620} 604}
621 605
622/* 606/*
@@ -626,15 +610,16 @@ xfs_buf_item_committed(
626 */ 610 */
627STATIC void 611STATIC void
628xfs_buf_item_push( 612xfs_buf_item_push(
629 xfs_buf_log_item_t *bip) 613 struct xfs_log_item *lip)
630{ 614{
631 xfs_buf_t *bp; 615 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
616 struct xfs_buf *bp = bip->bli_buf;
632 617
633 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 618 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
619 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
620
634 trace_xfs_buf_item_push(bip); 621 trace_xfs_buf_item_push(bip);
635 622
636 bp = bip->bli_buf;
637 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
638 xfs_buf_relse(bp); 623 xfs_buf_relse(bp);
639} 624}
640 625
@@ -646,22 +631,24 @@ xfs_buf_item_push(
646 */ 631 */
647STATIC void 632STATIC void
648xfs_buf_item_pushbuf( 633xfs_buf_item_pushbuf(
649 xfs_buf_log_item_t *bip) 634 struct xfs_log_item *lip)
650{ 635{
651 xfs_buf_t *bp; 636 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
637 struct xfs_buf *bp = bip->bli_buf;
652 638
653 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 639 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
640 ASSERT(XFS_BUF_ISDELAYWRITE(bp));
641
654 trace_xfs_buf_item_pushbuf(bip); 642 trace_xfs_buf_item_pushbuf(bip);
655 643
656 bp = bip->bli_buf;
657 ASSERT(XFS_BUF_ISDELAYWRITE(bp));
658 xfs_buf_delwri_promote(bp); 644 xfs_buf_delwri_promote(bp);
659 xfs_buf_relse(bp); 645 xfs_buf_relse(bp);
660} 646}
661 647
662/* ARGSUSED */
663STATIC void 648STATIC void
664xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) 649xfs_buf_item_committing(
650 struct xfs_log_item *lip,
651 xfs_lsn_t commit_lsn)
665{ 652{
666} 653}
667 654
@@ -669,21 +656,16 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
669 * This is the ops vector shared by all buf log items. 656 * This is the ops vector shared by all buf log items.
670 */ 657 */
671static struct xfs_item_ops xfs_buf_item_ops = { 658static struct xfs_item_ops xfs_buf_item_ops = {
672 .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, 659 .iop_size = xfs_buf_item_size,
673 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 660 .iop_format = xfs_buf_item_format,
674 xfs_buf_item_format, 661 .iop_pin = xfs_buf_item_pin,
675 .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, 662 .iop_unpin = xfs_buf_item_unpin,
676 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin, 663 .iop_trylock = xfs_buf_item_trylock,
677 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 664 .iop_unlock = xfs_buf_item_unlock,
678 xfs_buf_item_unpin_remove, 665 .iop_committed = xfs_buf_item_committed,
679 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, 666 .iop_push = xfs_buf_item_push,
680 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock, 667 .iop_pushbuf = xfs_buf_item_pushbuf,
681 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 668 .iop_committing = xfs_buf_item_committing
682 xfs_buf_item_committed,
683 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
684 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
685 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
686 xfs_buf_item_committing
687}; 669};
688 670
689 671
@@ -712,7 +694,6 @@ xfs_buf_item_init(
712 */ 694 */
713 if (bp->b_mount != mp) 695 if (bp->b_mount != mp)
714 bp->b_mount = mp; 696 bp->b_mount = mp;
715 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
716 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 697 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
717 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 698 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
718 if (lip->li_type == XFS_LI_BUF) { 699 if (lip->li_type == XFS_LI_BUF) {
@@ -1098,15 +1079,14 @@ xfs_buf_error_relse(
1098 * It is called by xfs_buf_iodone_callbacks() above which will take 1079 * It is called by xfs_buf_iodone_callbacks() above which will take
1099 * care of cleaning up the buffer itself. 1080 * care of cleaning up the buffer itself.
1100 */ 1081 */
1101/* ARGSUSED */
1102void 1082void
1103xfs_buf_iodone( 1083xfs_buf_iodone(
1104 xfs_buf_t *bp, 1084 struct xfs_buf *bp,
1105 xfs_buf_log_item_t *bip) 1085 struct xfs_log_item *lip)
1106{ 1086{
1107 struct xfs_ail *ailp = bip->bli_item.li_ailp; 1087 struct xfs_ail *ailp = lip->li_ailp;
1108 1088
1109 ASSERT(bip->bli_buf == bp); 1089 ASSERT(BUF_ITEM(lip)->bli_buf == bp);
1110 1090
1111 xfs_buf_rele(bp); 1091 xfs_buf_rele(bp);
1112 1092
@@ -1120,6 +1100,6 @@ xfs_buf_iodone(
1120 * Either way, AIL is useless if we're forcing a shutdown. 1100 * Either way, AIL is useless if we're forcing a shutdown.
1121 */ 1101 */
1122 spin_lock(&ailp->xa_lock); 1102 spin_lock(&ailp->xa_lock);
1123 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); 1103 xfs_trans_ail_delete(ailp, lip);
1124 xfs_buf_item_free(bip); 1104 xfs_buf_item_free(BUF_ITEM(lip));
1125} 1105}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index f20bb472d582..0e2ed43f16c7 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -124,7 +124,7 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
124 void(*)(struct xfs_buf *, xfs_log_item_t *), 124 void(*)(struct xfs_buf *, xfs_log_item_t *),
125 xfs_log_item_t *); 125 xfs_log_item_t *);
126void xfs_buf_iodone_callbacks(struct xfs_buf *); 126void xfs_buf_iodone_callbacks(struct xfs_buf *);
127void xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *); 127void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
128 128
129#ifdef XFS_TRANS_DEBUG 129#ifdef XFS_TRANS_DEBUG
130void 130void
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 0ca556b4bf31..30fa0e206fba 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -25,19 +25,14 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h" 31#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
39#include "xfs_alloc.h" 35#include "xfs_alloc.h"
40#include "xfs_btree.h"
41#include "xfs_bmap.h" 36#include "xfs_bmap.h"
42#include "xfs_attr.h" 37#include "xfs_attr.h"
43#include "xfs_attr_leaf.h" 38#include "xfs_attr_leaf.h"
@@ -581,16 +576,14 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
581 xfs_da_intnode_t *node; 576 xfs_da_intnode_t *node;
582 xfs_da_node_entry_t *btree; 577 xfs_da_node_entry_t *btree;
583 int tmp; 578 int tmp;
584 xfs_mount_t *mp;
585 579
586 node = oldblk->bp->data; 580 node = oldblk->bp->data;
587 mp = state->mp;
588 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 581 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
589 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 582 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
590 ASSERT(newblk->blkno != 0); 583 ASSERT(newblk->blkno != 0);
591 if (state->args->whichfork == XFS_DATA_FORK) 584 if (state->args->whichfork == XFS_DATA_FORK)
592 ASSERT(newblk->blkno >= mp->m_dirleafblk && 585 ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
593 newblk->blkno < mp->m_dirfreeblk); 586 newblk->blkno < state->mp->m_dirfreeblk);
594 587
595 /* 588 /*
596 * We may need to make some room before we insert the new node. 589 * We may need to make some room before we insert the new node.
@@ -1601,7 +1594,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1601 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| 1594 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1602 XFS_BMAPI_CONTIG, 1595 XFS_BMAPI_CONTIG,
1603 args->firstblock, args->total, &map, &nmap, 1596 args->firstblock, args->total, &map, &nmap,
1604 args->flist, NULL))) { 1597 args->flist))) {
1605 return error; 1598 return error;
1606 } 1599 }
1607 ASSERT(nmap <= 1); 1600 ASSERT(nmap <= 1);
@@ -1622,8 +1615,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1622 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| 1615 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
1623 XFS_BMAPI_METADATA, 1616 XFS_BMAPI_METADATA,
1624 args->firstblock, args->total, 1617 args->firstblock, args->total,
1625 &mapp[mapi], &nmap, args->flist, 1618 &mapp[mapi], &nmap, args->flist))) {
1626 NULL))) {
1627 kmem_free(mapp); 1619 kmem_free(mapp);
1628 return error; 1620 return error;
1629 } 1621 }
@@ -1884,7 +1876,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1884 */ 1876 */
1885 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, 1877 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1886 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 1878 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1887 0, args->firstblock, args->flist, NULL, 1879 0, args->firstblock, args->flist,
1888 &done)) == ENOSPC) { 1880 &done)) == ENOSPC) {
1889 if (w != XFS_DATA_FORK) 1881 if (w != XFS_DATA_FORK)
1890 break; 1882 break;
@@ -1989,7 +1981,7 @@ xfs_da_do_buf(
1989 nfsb, 1981 nfsb,
1990 XFS_BMAPI_METADATA | 1982 XFS_BMAPI_METADATA |
1991 xfs_bmapi_aflag(whichfork), 1983 xfs_bmapi_aflag(whichfork),
1992 NULL, 0, mapp, &nmap, NULL, NULL))) 1984 NULL, 0, mapp, &nmap, NULL)))
1993 goto exit0; 1985 goto exit0;
1994 } 1986 }
1995 } else { 1987 } else {
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 7f159d2a429a..3b9582c60a22 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -24,24 +24,15 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 29#include "xfs_dinode.h"
36#include "xfs_inode.h" 30#include "xfs_inode.h"
37#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
38#include "xfs_bmap.h" 32#include "xfs_bmap.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_itable.h" 33#include "xfs_itable.h"
42#include "xfs_dfrag.h" 34#include "xfs_dfrag.h"
43#include "xfs_error.h" 35#include "xfs_error.h"
44#include "xfs_rw.h"
45#include "xfs_vnodeops.h" 36#include "xfs_vnodeops.h"
46#include "xfs_trace.h" 37#include "xfs_trace.h"
47 38
@@ -425,11 +416,8 @@ xfs_swap_extents(
425 } 416 }
426 417
427 418
428 IHOLD(ip); 419 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
429 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 420 xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
430
431 IHOLD(tip);
432 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
433 421
434 xfs_trans_log_inode(tp, ip, ilf_fields); 422 xfs_trans_log_inode(tp, ip, ilf_fields);
435 xfs_trans_log_inode(tp, tip, tilf_fields); 423 xfs_trans_log_inode(tp, tip, tilf_fields);
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 42520f041265..a1321bc7f192 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -25,13 +25,11 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
33#include "xfs_dir2_sf.h" 32#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 33#include "xfs_dinode.h"
36#include "xfs_inode.h" 34#include "xfs_inode.h"
37#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
@@ -382,7 +380,7 @@ xfs_readdir(
382 int rval; /* return value */ 380 int rval; /* return value */
383 int v; /* type-checking value */ 381 int v; /* type-checking value */
384 382
385 xfs_itrace_entry(dp); 383 trace_xfs_readdir(dp);
386 384
387 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 385 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
388 return XFS_ERROR(EIO); 386 return XFS_ERROR(EIO);
@@ -549,7 +547,7 @@ xfs_dir2_grow_inode(
549 if ((error = xfs_bmapi(tp, dp, bno, count, 547 if ((error = xfs_bmapi(tp, dp, bno, count,
550 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 548 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
551 args->firstblock, args->total, &map, &nmap, 549 args->firstblock, args->total, &map, &nmap,
552 args->flist, NULL))) 550 args->flist)))
553 return error; 551 return error;
554 ASSERT(nmap <= 1); 552 ASSERT(nmap <= 1);
555 if (nmap == 1) { 553 if (nmap == 1) {
@@ -581,8 +579,7 @@ xfs_dir2_grow_inode(
581 if ((error = xfs_bmapi(tp, dp, b, c, 579 if ((error = xfs_bmapi(tp, dp, b, c,
582 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, 580 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
583 args->firstblock, args->total, 581 args->firstblock, args->total,
584 &mapp[mapi], &nmap, args->flist, 582 &mapp[mapi], &nmap, args->flist))) {
585 NULL))) {
586 kmem_free(mapp); 583 kmem_free(mapp);
587 return error; 584 return error;
588 } 585 }
@@ -715,7 +712,7 @@ xfs_dir2_shrink_inode(
715 */ 712 */
716 if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, 713 if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
717 XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, 714 XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
718 NULL, &done))) { 715 &done))) {
719 /* 716 /*
720 * ENOSPC actually can happen if we're in a removename with 717 * ENOSPC actually can happen if we're in a removename with
721 * no space reservation, and the resulting block removal 718 * no space reservation, and the resulting block removal
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 779a267b0a84..580d99cef9e7 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
@@ -1073,10 +1071,10 @@ xfs_dir2_sf_to_block(
1073 */ 1071 */
1074 1072
1075 buf_len = dp->i_df.if_bytes; 1073 buf_len = dp->i_df.if_bytes;
1076 buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); 1074 buf = kmem_alloc(buf_len, KM_SLEEP);
1077 1075
1078 memcpy(buf, sfp, dp->i_df.if_bytes); 1076 memcpy(buf, sfp, buf_len);
1079 xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); 1077 xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
1080 dp->i_d.di_size = 0; 1078 dp->i_d.di_size = 0;
1081 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1079 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1082 /* 1080 /*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 498f8d694330..921595b84f5b 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_dir2_data.h" 33#include "xfs_dir2_data.h"
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index e2d89854ec9e..504be8640e91 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -25,11 +25,9 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dir2_sf.h" 31#include "xfs_dir2_sf.h"
34#include "xfs_dinode.h" 32#include "xfs_dinode.h"
35#include "xfs_inode.h" 33#include "xfs_inode.h"
@@ -875,7 +873,7 @@ xfs_dir2_leaf_getdents(
875 xfs_dir2_byte_to_da(mp, 873 xfs_dir2_byte_to_da(mp,
876 XFS_DIR2_LEAF_OFFSET) - map_off, 874 XFS_DIR2_LEAF_OFFSET) - map_off,
877 XFS_BMAPI_METADATA, NULL, 0, 875 XFS_BMAPI_METADATA, NULL, 0,
878 &map[map_valid], &nmap, NULL, NULL); 876 &map[map_valid], &nmap, NULL);
879 /* 877 /*
880 * Don't know if we should ignore this or 878 * Don't know if we should ignore this or
881 * try to return an error. 879 * try to return an error.
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 78fc4d9ae756..f9a0864b696a 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_bmap.h" 33#include "xfs_bmap.h"
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index c1a5945d463a..b1bae6b1eed9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -24,12 +24,10 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
deleted file mode 100644
index 2813cdd72375..000000000000
--- a/fs/xfs/xfs_dmapi.h
+++ /dev/null
@@ -1,170 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DMAPI_H__
19#define __XFS_DMAPI_H__
20
21/* Values used to define the on-disk version of dm_attrname_t. All
22 * on-disk attribute names start with the 8-byte string "SGI_DMI_".
23 *
24 * In the on-disk inode, DMAPI attribute names consist of the user-provided
25 * name with the DMATTR_PREFIXSTRING pre-pended. This string must NEVER be
26 * changed.
27 */
28
29#define DMATTR_PREFIXLEN 8
30#define DMATTR_PREFIXSTRING "SGI_DMI_"
31
32typedef enum {
33 DM_EVENT_INVALID = -1,
34 DM_EVENT_CANCEL = 0, /* not supported */
35 DM_EVENT_MOUNT = 1,
36 DM_EVENT_PREUNMOUNT = 2,
37 DM_EVENT_UNMOUNT = 3,
38 DM_EVENT_DEBUT = 4, /* not supported */
39 DM_EVENT_CREATE = 5,
40 DM_EVENT_CLOSE = 6, /* not supported */
41 DM_EVENT_POSTCREATE = 7,
42 DM_EVENT_REMOVE = 8,
43 DM_EVENT_POSTREMOVE = 9,
44 DM_EVENT_RENAME = 10,
45 DM_EVENT_POSTRENAME = 11,
46 DM_EVENT_LINK = 12,
47 DM_EVENT_POSTLINK = 13,
48 DM_EVENT_SYMLINK = 14,
49 DM_EVENT_POSTSYMLINK = 15,
50 DM_EVENT_READ = 16,
51 DM_EVENT_WRITE = 17,
52 DM_EVENT_TRUNCATE = 18,
53 DM_EVENT_ATTRIBUTE = 19,
54 DM_EVENT_DESTROY = 20,
55 DM_EVENT_NOSPACE = 21,
56 DM_EVENT_USER = 22,
57 DM_EVENT_MAX = 23
58} dm_eventtype_t;
59#define HAVE_DM_EVENTTYPE_T
60
61typedef enum {
62 DM_RIGHT_NULL,
63 DM_RIGHT_SHARED,
64 DM_RIGHT_EXCL
65} dm_right_t;
66#define HAVE_DM_RIGHT_T
67
68/* Defines for determining if an event message should be sent. */
69#ifdef HAVE_DMAPI
70#define DM_EVENT_ENABLED(ip, event) ( \
71 unlikely ((ip)->i_mount->m_flags & XFS_MOUNT_DMAPI) && \
72 ( ((ip)->i_d.di_dmevmask & (1 << event)) || \
73 ((ip)->i_mount->m_dmevmask & (1 << event)) ) \
74 )
75#else
76#define DM_EVENT_ENABLED(ip, event) (0)
77#endif
78
79#define DM_XFS_VALID_FS_EVENTS ( \
80 (1 << DM_EVENT_PREUNMOUNT) | \
81 (1 << DM_EVENT_UNMOUNT) | \
82 (1 << DM_EVENT_NOSPACE) | \
83 (1 << DM_EVENT_DEBUT) | \
84 (1 << DM_EVENT_CREATE) | \
85 (1 << DM_EVENT_POSTCREATE) | \
86 (1 << DM_EVENT_REMOVE) | \
87 (1 << DM_EVENT_POSTREMOVE) | \
88 (1 << DM_EVENT_RENAME) | \
89 (1 << DM_EVENT_POSTRENAME) | \
90 (1 << DM_EVENT_LINK) | \
91 (1 << DM_EVENT_POSTLINK) | \
92 (1 << DM_EVENT_SYMLINK) | \
93 (1 << DM_EVENT_POSTSYMLINK) | \
94 (1 << DM_EVENT_ATTRIBUTE) | \
95 (1 << DM_EVENT_DESTROY) )
96
97/* Events valid in dm_set_eventlist() when called with a file handle for
98 a regular file or a symlink. These events are persistent.
99*/
100
101#define DM_XFS_VALID_FILE_EVENTS ( \
102 (1 << DM_EVENT_ATTRIBUTE) | \
103 (1 << DM_EVENT_DESTROY) )
104
105/* Events valid in dm_set_eventlist() when called with a file handle for
106 a directory. These events are persistent.
107*/
108
109#define DM_XFS_VALID_DIRECTORY_EVENTS ( \
110 (1 << DM_EVENT_CREATE) | \
111 (1 << DM_EVENT_POSTCREATE) | \
112 (1 << DM_EVENT_REMOVE) | \
113 (1 << DM_EVENT_POSTREMOVE) | \
114 (1 << DM_EVENT_RENAME) | \
115 (1 << DM_EVENT_POSTRENAME) | \
116 (1 << DM_EVENT_LINK) | \
117 (1 << DM_EVENT_POSTLINK) | \
118 (1 << DM_EVENT_SYMLINK) | \
119 (1 << DM_EVENT_POSTSYMLINK) | \
120 (1 << DM_EVENT_ATTRIBUTE) | \
121 (1 << DM_EVENT_DESTROY) )
122
123/* Events supported by the XFS filesystem. */
124#define DM_XFS_SUPPORTED_EVENTS ( \
125 (1 << DM_EVENT_MOUNT) | \
126 (1 << DM_EVENT_PREUNMOUNT) | \
127 (1 << DM_EVENT_UNMOUNT) | \
128 (1 << DM_EVENT_NOSPACE) | \
129 (1 << DM_EVENT_CREATE) | \
130 (1 << DM_EVENT_POSTCREATE) | \
131 (1 << DM_EVENT_REMOVE) | \
132 (1 << DM_EVENT_POSTREMOVE) | \
133 (1 << DM_EVENT_RENAME) | \
134 (1 << DM_EVENT_POSTRENAME) | \
135 (1 << DM_EVENT_LINK) | \
136 (1 << DM_EVENT_POSTLINK) | \
137 (1 << DM_EVENT_SYMLINK) | \
138 (1 << DM_EVENT_POSTSYMLINK) | \
139 (1 << DM_EVENT_READ) | \
140 (1 << DM_EVENT_WRITE) | \
141 (1 << DM_EVENT_TRUNCATE) | \
142 (1 << DM_EVENT_ATTRIBUTE) | \
143 (1 << DM_EVENT_DESTROY) )
144
145
146/*
147 * Definitions used for the flags field on dm_send_*_event().
148 */
149
150#define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */
151#define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */
152#define DM_FLAGS_IMUX 0x004 /* thread holds i_mutex */
153#define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */
154#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */
155
156/*
157 * Pull in platform specific event flags defines
158 */
159#include "xfs_dmapi_priv.h"
160
161/*
162 * Macros to turn caller specified delay/block flags into
163 * dm_send_xxxx_event flag DM_FLAGS_NDELAY.
164 */
165
166#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
167 DM_FLAGS_NDELAY : 0)
168#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
169
170#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
deleted file mode 100644
index e71e2581c0c3..000000000000
--- a/fs/xfs/xfs_dmops.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_trans.h"
23#include "xfs_sb.h"
24#include "xfs_dmapi.h"
25#include "xfs_inum.h"
26#include "xfs_ag.h"
27#include "xfs_mount.h"
28
29
30static struct xfs_dmops xfs_dmcore_stub = {
31 .xfs_send_data = (xfs_send_data_t)fs_nosys,
32 .xfs_send_mmap = (xfs_send_mmap_t)fs_noerr,
33 .xfs_send_destroy = (xfs_send_destroy_t)fs_nosys,
34 .xfs_send_namesp = (xfs_send_namesp_t)fs_nosys,
35 .xfs_send_mount = (xfs_send_mount_t)fs_nosys,
36 .xfs_send_unmount = (xfs_send_unmount_t)fs_noerr,
37};
38
39int
40xfs_dmops_get(struct xfs_mount *mp)
41{
42 if (mp->m_flags & XFS_MOUNT_DMAPI) {
43 cmn_err(CE_WARN,
44 "XFS: dmapi support not available in this kernel.");
45 return EINVAL;
46 }
47
48 mp->m_dm_ops = &xfs_dmcore_stub;
49 return 0;
50}
51
52void
53xfs_dmops_put(struct xfs_mount *mp)
54{
55}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 047b8a8e5c29..ed9990267661 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -23,12 +23,8 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 26#include "xfs_mount.h"
29#include "xfs_bmap_btree.h" 27#include "xfs_bmap_btree.h"
30#include "xfs_dir2_sf.h"
31#include "xfs_attr_sf.h"
32#include "xfs_dinode.h" 28#include "xfs_dinode.h"
33#include "xfs_inode.h" 29#include "xfs_inode.h"
34#include "xfs_utils.h" 30#include "xfs_utils.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 409fe81585fd..a55e687bf562 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -24,7 +24,6 @@
24#include "xfs_buf_item.h" 24#include "xfs_buf_item.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
30#include "xfs_extfree_item.h" 29#include "xfs_extfree_item.h"
@@ -33,18 +32,19 @@
33kmem_zone_t *xfs_efi_zone; 32kmem_zone_t *xfs_efi_zone;
34kmem_zone_t *xfs_efd_zone; 33kmem_zone_t *xfs_efd_zone;
35 34
36STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *); 35static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
36{
37 return container_of(lip, struct xfs_efi_log_item, efi_item);
38}
37 39
38void 40void
39xfs_efi_item_free(xfs_efi_log_item_t *efip) 41xfs_efi_item_free(
42 struct xfs_efi_log_item *efip)
40{ 43{
41 int nexts = efip->efi_format.efi_nextents; 44 if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
42
43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
44 kmem_free(efip); 45 kmem_free(efip);
45 } else { 46 else
46 kmem_zone_free(xfs_efi_zone, efip); 47 kmem_zone_free(xfs_efi_zone, efip);
47 }
48} 48}
49 49
50/* 50/*
@@ -52,9 +52,9 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
52 * We only need 1 iovec for an efi item. It just logs the efi_log_format 52 * We only need 1 iovec for an efi item. It just logs the efi_log_format
53 * structure. 53 * structure.
54 */ 54 */
55/*ARGSUSED*/
56STATIC uint 55STATIC uint
57xfs_efi_item_size(xfs_efi_log_item_t *efip) 56xfs_efi_item_size(
57 struct xfs_log_item *lip)
58{ 58{
59 return 1; 59 return 1;
60} 60}
@@ -67,10 +67,12 @@ xfs_efi_item_size(xfs_efi_log_item_t *efip)
67 * slots in the efi item have been filled. 67 * slots in the efi item have been filled.
68 */ 68 */
69STATIC void 69STATIC void
70xfs_efi_item_format(xfs_efi_log_item_t *efip, 70xfs_efi_item_format(
71 xfs_log_iovec_t *log_vector) 71 struct xfs_log_item *lip,
72 struct xfs_log_iovec *log_vector)
72{ 73{
73 uint size; 74 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
75 uint size;
74 76
75 ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); 77 ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents);
76 78
@@ -80,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
80 size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); 82 size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
81 efip->efi_format.efi_size = 1; 83 efip->efi_format.efi_size = 1;
82 84
83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); 85 log_vector->i_addr = &efip->efi_format;
84 log_vector->i_len = size; 86 log_vector->i_len = size;
85 log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; 87 log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
86 ASSERT(size >= sizeof(xfs_efi_log_format_t)); 88 ASSERT(size >= sizeof(xfs_efi_log_format_t));
@@ -90,60 +92,33 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
90/* 92/*
91 * Pinning has no meaning for an efi item, so just return. 93 * Pinning has no meaning for an efi item, so just return.
92 */ 94 */
93/*ARGSUSED*/
94STATIC void 95STATIC void
95xfs_efi_item_pin(xfs_efi_log_item_t *efip) 96xfs_efi_item_pin(
97 struct xfs_log_item *lip)
96{ 98{
97 return;
98} 99}
99 100
100
101/* 101/*
102 * While EFIs cannot really be pinned, the unpin operation is the 102 * While EFIs cannot really be pinned, the unpin operation is the
103 * last place at which the EFI is manipulated during a transaction. 103 * last place at which the EFI is manipulated during a transaction.
104 * Here we coordinate with xfs_efi_cancel() to determine who gets to 104 * Here we coordinate with xfs_efi_cancel() to determine who gets to
105 * free the EFI. 105 * free the EFI.
106 */ 106 */
107/*ARGSUSED*/
108STATIC void
109xfs_efi_item_unpin(xfs_efi_log_item_t *efip)
110{
111 struct xfs_ail *ailp = efip->efi_item.li_ailp;
112
113 spin_lock(&ailp->xa_lock);
114 if (efip->efi_flags & XFS_EFI_CANCELED) {
115 /* xfs_trans_ail_delete() drops the AIL lock. */
116 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
117 xfs_efi_item_free(efip);
118 } else {
119 efip->efi_flags |= XFS_EFI_COMMITTED;
120 spin_unlock(&ailp->xa_lock);
121 }
122}
123
124/*
125 * like unpin only we have to also clear the xaction descriptor
126 * pointing the log item if we free the item. This routine duplicates
127 * unpin because efi_flags is protected by the AIL lock. Freeing
128 * the descriptor and then calling unpin would force us to drop the AIL
129 * lock which would open up a race condition.
130 */
131STATIC void 107STATIC void
132xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) 108xfs_efi_item_unpin(
109 struct xfs_log_item *lip,
110 int remove)
133{ 111{
134 struct xfs_ail *ailp = efip->efi_item.li_ailp; 112 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
135 xfs_log_item_desc_t *lidp; 113 struct xfs_ail *ailp = lip->li_ailp;
136 114
137 spin_lock(&ailp->xa_lock); 115 spin_lock(&ailp->xa_lock);
138 if (efip->efi_flags & XFS_EFI_CANCELED) { 116 if (efip->efi_flags & XFS_EFI_CANCELED) {
139 /* 117 if (remove)
140 * free the xaction descriptor pointing to this item 118 xfs_trans_del_item(lip);
141 */
142 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
143 xfs_trans_free_item(tp, lidp);
144 119
145 /* xfs_trans_ail_delete() drops the AIL lock. */ 120 /* xfs_trans_ail_delete() drops the AIL lock. */
146 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); 121 xfs_trans_ail_delete(ailp, lip);
147 xfs_efi_item_free(efip); 122 xfs_efi_item_free(efip);
148 } else { 123 } else {
149 efip->efi_flags |= XFS_EFI_COMMITTED; 124 efip->efi_flags |= XFS_EFI_COMMITTED;
@@ -158,9 +133,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
158 * XFS_ITEM_PINNED so that the caller will eventually flush the log. 133 * XFS_ITEM_PINNED so that the caller will eventually flush the log.
159 * This should help in getting the EFI out of the AIL. 134 * This should help in getting the EFI out of the AIL.
160 */ 135 */
161/*ARGSUSED*/
162STATIC uint 136STATIC uint
163xfs_efi_item_trylock(xfs_efi_log_item_t *efip) 137xfs_efi_item_trylock(
138 struct xfs_log_item *lip)
164{ 139{
165 return XFS_ITEM_PINNED; 140 return XFS_ITEM_PINNED;
166} 141}
@@ -168,13 +143,12 @@ xfs_efi_item_trylock(xfs_efi_log_item_t *efip)
168/* 143/*
169 * Efi items have no locking, so just return. 144 * Efi items have no locking, so just return.
170 */ 145 */
171/*ARGSUSED*/
172STATIC void 146STATIC void
173xfs_efi_item_unlock(xfs_efi_log_item_t *efip) 147xfs_efi_item_unlock(
148 struct xfs_log_item *lip)
174{ 149{
175 if (efip->efi_item.li_flags & XFS_LI_ABORTED) 150 if (lip->li_flags & XFS_LI_ABORTED)
176 xfs_efi_item_free(efip); 151 xfs_efi_item_free(EFI_ITEM(lip));
177 return;
178} 152}
179 153
180/* 154/*
@@ -183,9 +157,10 @@ xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
183 * flag is not paid any attention here. Checking for that is delayed 157 * flag is not paid any attention here. Checking for that is delayed
184 * until the EFI is unpinned. 158 * until the EFI is unpinned.
185 */ 159 */
186/*ARGSUSED*/
187STATIC xfs_lsn_t 160STATIC xfs_lsn_t
188xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) 161xfs_efi_item_committed(
162 struct xfs_log_item *lip,
163 xfs_lsn_t lsn)
189{ 164{
190 return lsn; 165 return lsn;
191} 166}
@@ -195,11 +170,10 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
195 * stuck waiting for all of its corresponding efd items to be 170 * stuck waiting for all of its corresponding efd items to be
196 * committed to disk. 171 * committed to disk.
197 */ 172 */
198/*ARGSUSED*/
199STATIC void 173STATIC void
200xfs_efi_item_push(xfs_efi_log_item_t *efip) 174xfs_efi_item_push(
175 struct xfs_log_item *lip)
201{ 176{
202 return;
203} 177}
204 178
205/* 179/*
@@ -209,61 +183,55 @@ xfs_efi_item_push(xfs_efi_log_item_t *efip)
209 * example, for inodes, the inode is locked throughout the extent freeing 183 * example, for inodes, the inode is locked throughout the extent freeing
210 * so the dependency should be recorded there. 184 * so the dependency should be recorded there.
211 */ 185 */
212/*ARGSUSED*/
213STATIC void 186STATIC void
214xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) 187xfs_efi_item_committing(
188 struct xfs_log_item *lip,
189 xfs_lsn_t lsn)
215{ 190{
216 return;
217} 191}
218 192
219/* 193/*
220 * This is the ops vector shared by all efi log items. 194 * This is the ops vector shared by all efi log items.
221 */ 195 */
222static struct xfs_item_ops xfs_efi_item_ops = { 196static struct xfs_item_ops xfs_efi_item_ops = {
223 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size, 197 .iop_size = xfs_efi_item_size,
224 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 198 .iop_format = xfs_efi_item_format,
225 xfs_efi_item_format, 199 .iop_pin = xfs_efi_item_pin,
226 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin, 200 .iop_unpin = xfs_efi_item_unpin,
227 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin, 201 .iop_trylock = xfs_efi_item_trylock,
228 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 202 .iop_unlock = xfs_efi_item_unlock,
229 xfs_efi_item_unpin_remove, 203 .iop_committed = xfs_efi_item_committed,
230 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock, 204 .iop_push = xfs_efi_item_push,
231 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efi_item_unlock, 205 .iop_committing = xfs_efi_item_committing
232 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
233 xfs_efi_item_committed,
234 .iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push,
235 .iop_pushbuf = NULL,
236 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
237 xfs_efi_item_committing
238}; 206};
239 207
240 208
241/* 209/*
242 * Allocate and initialize an efi item with the given number of extents. 210 * Allocate and initialize an efi item with the given number of extents.
243 */ 211 */
244xfs_efi_log_item_t * 212struct xfs_efi_log_item *
245xfs_efi_init(xfs_mount_t *mp, 213xfs_efi_init(
246 uint nextents) 214 struct xfs_mount *mp,
215 uint nextents)
247 216
248{ 217{
249 xfs_efi_log_item_t *efip; 218 struct xfs_efi_log_item *efip;
250 uint size; 219 uint size;
251 220
252 ASSERT(nextents > 0); 221 ASSERT(nextents > 0);
253 if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { 222 if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
254 size = (uint)(sizeof(xfs_efi_log_item_t) + 223 size = (uint)(sizeof(xfs_efi_log_item_t) +
255 ((nextents - 1) * sizeof(xfs_extent_t))); 224 ((nextents - 1) * sizeof(xfs_extent_t)));
256 efip = (xfs_efi_log_item_t*)kmem_zalloc(size, KM_SLEEP); 225 efip = kmem_zalloc(size, KM_SLEEP);
257 } else { 226 } else {
258 efip = (xfs_efi_log_item_t*)kmem_zone_zalloc(xfs_efi_zone, 227 efip = kmem_zone_zalloc(xfs_efi_zone, KM_SLEEP);
259 KM_SLEEP);
260 } 228 }
261 229
262 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); 230 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
263 efip->efi_format.efi_nextents = nextents; 231 efip->efi_format.efi_nextents = nextents;
264 efip->efi_format.efi_id = (__psint_t)(void*)efip; 232 efip->efi_format.efi_id = (__psint_t)(void*)efip;
265 233
266 return (efip); 234 return efip;
267} 235}
268 236
269/* 237/*
@@ -276,7 +244,7 @@ xfs_efi_init(xfs_mount_t *mp,
276int 244int
277xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) 245xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
278{ 246{
279 xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr; 247 xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
280 uint i; 248 uint i;
281 uint len = sizeof(xfs_efi_log_format_t) + 249 uint len = sizeof(xfs_efi_log_format_t) +
282 (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); 250 (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);
@@ -289,8 +257,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
289 memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); 257 memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
290 return 0; 258 return 0;
291 } else if (buf->i_len == len32) { 259 } else if (buf->i_len == len32) {
292 xfs_efi_log_format_32_t *src_efi_fmt_32 = 260 xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
293 (xfs_efi_log_format_32_t *)buf->i_addr;
294 261
295 dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type; 262 dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type;
296 dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size; 263 dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size;
@@ -304,8 +271,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
304 } 271 }
305 return 0; 272 return 0;
306 } else if (buf->i_len == len64) { 273 } else if (buf->i_len == len64) {
307 xfs_efi_log_format_64_t *src_efi_fmt_64 = 274 xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->i_addr;
308 (xfs_efi_log_format_64_t *)buf->i_addr;
309 275
310 dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type; 276 dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type;
311 dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size; 277 dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size;
@@ -356,16 +322,18 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
356 } 322 }
357} 323}
358 324
359STATIC void 325static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
360xfs_efd_item_free(xfs_efd_log_item_t *efdp)
361{ 326{
362 int nexts = efdp->efd_format.efd_nextents; 327 return container_of(lip, struct xfs_efd_log_item, efd_item);
328}
363 329
364 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { 330STATIC void
331xfs_efd_item_free(struct xfs_efd_log_item *efdp)
332{
333 if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
365 kmem_free(efdp); 334 kmem_free(efdp);
366 } else { 335 else
367 kmem_zone_free(xfs_efd_zone, efdp); 336 kmem_zone_free(xfs_efd_zone, efdp);
368 }
369} 337}
370 338
371/* 339/*
@@ -373,9 +341,9 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
373 * We only need 1 iovec for an efd item. It just logs the efd_log_format 341 * We only need 1 iovec for an efd item. It just logs the efd_log_format
374 * structure. 342 * structure.
375 */ 343 */
376/*ARGSUSED*/
377STATIC uint 344STATIC uint
378xfs_efd_item_size(xfs_efd_log_item_t *efdp) 345xfs_efd_item_size(
346 struct xfs_log_item *lip)
379{ 347{
380 return 1; 348 return 1;
381} 349}
@@ -388,10 +356,12 @@ xfs_efd_item_size(xfs_efd_log_item_t *efdp)
388 * slots in the efd item have been filled. 356 * slots in the efd item have been filled.
389 */ 357 */
390STATIC void 358STATIC void
391xfs_efd_item_format(xfs_efd_log_item_t *efdp, 359xfs_efd_item_format(
392 xfs_log_iovec_t *log_vector) 360 struct xfs_log_item *lip,
361 struct xfs_log_iovec *log_vector)
393{ 362{
394 uint size; 363 struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
364 uint size;
395 365
396 ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); 366 ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
397 367
@@ -401,48 +371,38 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
401 size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); 371 size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
402 efdp->efd_format.efd_size = 1; 372 efdp->efd_format.efd_size = 1;
403 373
404 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); 374 log_vector->i_addr = &efdp->efd_format;
405 log_vector->i_len = size; 375 log_vector->i_len = size;
406 log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; 376 log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
407 ASSERT(size >= sizeof(xfs_efd_log_format_t)); 377 ASSERT(size >= sizeof(xfs_efd_log_format_t));
408} 378}
409 379
410
411/* 380/*
412 * Pinning has no meaning for an efd item, so just return. 381 * Pinning has no meaning for an efd item, so just return.
413 */ 382 */
414/*ARGSUSED*/
415STATIC void 383STATIC void
416xfs_efd_item_pin(xfs_efd_log_item_t *efdp) 384xfs_efd_item_pin(
385 struct xfs_log_item *lip)
417{ 386{
418 return;
419} 387}
420 388
421
422/* 389/*
423 * Since pinning has no meaning for an efd item, unpinning does 390 * Since pinning has no meaning for an efd item, unpinning does
424 * not either. 391 * not either.
425 */ 392 */
426/*ARGSUSED*/
427STATIC void
428xfs_efd_item_unpin(xfs_efd_log_item_t *efdp)
429{
430 return;
431}
432
433/*ARGSUSED*/
434STATIC void 393STATIC void
435xfs_efd_item_unpin_remove(xfs_efd_log_item_t *efdp, xfs_trans_t *tp) 394xfs_efd_item_unpin(
395 struct xfs_log_item *lip,
396 int remove)
436{ 397{
437 return;
438} 398}
439 399
440/* 400/*
441 * Efd items have no locking, so just return success. 401 * Efd items have no locking, so just return success.
442 */ 402 */
443/*ARGSUSED*/
444STATIC uint 403STATIC uint
445xfs_efd_item_trylock(xfs_efd_log_item_t *efdp) 404xfs_efd_item_trylock(
405 struct xfs_log_item *lip)
446{ 406{
447 return XFS_ITEM_LOCKED; 407 return XFS_ITEM_LOCKED;
448} 408}
@@ -451,13 +411,12 @@ xfs_efd_item_trylock(xfs_efd_log_item_t *efdp)
451 * Efd items have no locking or pushing, so return failure 411 * Efd items have no locking or pushing, so return failure
452 * so that the caller doesn't bother with us. 412 * so that the caller doesn't bother with us.
453 */ 413 */
454/*ARGSUSED*/
455STATIC void 414STATIC void
456xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) 415xfs_efd_item_unlock(
416 struct xfs_log_item *lip)
457{ 417{
458 if (efdp->efd_item.li_flags & XFS_LI_ABORTED) 418 if (lip->li_flags & XFS_LI_ABORTED)
459 xfs_efd_item_free(efdp); 419 xfs_efd_item_free(EFD_ITEM(lip));
460 return;
461} 420}
462 421
463/* 422/*
@@ -467,15 +426,18 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
467 * return -1 to keep the transaction code from further referencing 426 * return -1 to keep the transaction code from further referencing
468 * this item. 427 * this item.
469 */ 428 */
470/*ARGSUSED*/
471STATIC xfs_lsn_t 429STATIC xfs_lsn_t
472xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) 430xfs_efd_item_committed(
431 struct xfs_log_item *lip,
432 xfs_lsn_t lsn)
473{ 433{
434 struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
435
474 /* 436 /*
475 * If we got a log I/O error, it's always the case that the LR with the 437 * If we got a log I/O error, it's always the case that the LR with the
476 * EFI got unpinned and freed before the EFD got aborted. 438 * EFI got unpinned and freed before the EFD got aborted.
477 */ 439 */
478 if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) 440 if (!(lip->li_flags & XFS_LI_ABORTED))
479 xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents); 441 xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents);
480 442
481 xfs_efd_item_free(efdp); 443 xfs_efd_item_free(efdp);
@@ -486,11 +448,10 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
486 * There isn't much you can do to push on an efd item. It is simply 448 * There isn't much you can do to push on an efd item. It is simply
487 * stuck waiting for the log to be flushed to disk. 449 * stuck waiting for the log to be flushed to disk.
488 */ 450 */
489/*ARGSUSED*/
490STATIC void 451STATIC void
491xfs_efd_item_push(xfs_efd_log_item_t *efdp) 452xfs_efd_item_push(
453 struct xfs_log_item *lip)
492{ 454{
493 return;
494} 455}
495 456
496/* 457/*
@@ -500,55 +461,48 @@ xfs_efd_item_push(xfs_efd_log_item_t *efdp)
500 * example, for inodes, the inode is locked throughout the extent freeing 461 * example, for inodes, the inode is locked throughout the extent freeing
501 * so the dependency should be recorded there. 462 * so the dependency should be recorded there.
502 */ 463 */
503/*ARGSUSED*/
504STATIC void 464STATIC void
505xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn) 465xfs_efd_item_committing(
466 struct xfs_log_item *lip,
467 xfs_lsn_t lsn)
506{ 468{
507 return;
508} 469}
509 470
510/* 471/*
511 * This is the ops vector shared by all efd log items. 472 * This is the ops vector shared by all efd log items.
512 */ 473 */
513static struct xfs_item_ops xfs_efd_item_ops = { 474static struct xfs_item_ops xfs_efd_item_ops = {
514 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size, 475 .iop_size = xfs_efd_item_size,
515 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 476 .iop_format = xfs_efd_item_format,
516 xfs_efd_item_format, 477 .iop_pin = xfs_efd_item_pin,
517 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin, 478 .iop_unpin = xfs_efd_item_unpin,
518 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin, 479 .iop_trylock = xfs_efd_item_trylock,
519 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 480 .iop_unlock = xfs_efd_item_unlock,
520 xfs_efd_item_unpin_remove, 481 .iop_committed = xfs_efd_item_committed,
521 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock, 482 .iop_push = xfs_efd_item_push,
522 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efd_item_unlock, 483 .iop_committing = xfs_efd_item_committing
523 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
524 xfs_efd_item_committed,
525 .iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push,
526 .iop_pushbuf = NULL,
527 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
528 xfs_efd_item_committing
529}; 484};
530 485
531
532/* 486/*
533 * Allocate and initialize an efd item with the given number of extents. 487 * Allocate and initialize an efd item with the given number of extents.
534 */ 488 */
535xfs_efd_log_item_t * 489struct xfs_efd_log_item *
536xfs_efd_init(xfs_mount_t *mp, 490xfs_efd_init(
537 xfs_efi_log_item_t *efip, 491 struct xfs_mount *mp,
538 uint nextents) 492 struct xfs_efi_log_item *efip,
493 uint nextents)
539 494
540{ 495{
541 xfs_efd_log_item_t *efdp; 496 struct xfs_efd_log_item *efdp;
542 uint size; 497 uint size;
543 498
544 ASSERT(nextents > 0); 499 ASSERT(nextents > 0);
545 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { 500 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
546 size = (uint)(sizeof(xfs_efd_log_item_t) + 501 size = (uint)(sizeof(xfs_efd_log_item_t) +
547 ((nextents - 1) * sizeof(xfs_extent_t))); 502 ((nextents - 1) * sizeof(xfs_extent_t)));
548 efdp = (xfs_efd_log_item_t*)kmem_zalloc(size, KM_SLEEP); 503 efdp = kmem_zalloc(size, KM_SLEEP);
549 } else { 504 } else {
550 efdp = (xfs_efd_log_item_t*)kmem_zone_zalloc(xfs_efd_zone, 505 efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
551 KM_SLEEP);
552 } 506 }
553 507
554 xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); 508 xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
@@ -556,5 +510,5 @@ xfs_efd_init(xfs_mount_t *mp,
556 efdp->efd_format.efd_nextents = nextents; 510 efdp->efd_format.efd_nextents = nextents;
557 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 511 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
558 512
559 return (efdp); 513 return efdp;
560} 514}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 390850ee6603..9b715dce5699 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -18,13 +18,9 @@
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_bmap_btree.h" 19#include "xfs_bmap_btree.h"
20#include "xfs_inum.h" 20#include "xfs_inum.h"
21#include "xfs_dir2.h"
22#include "xfs_dir2_sf.h"
23#include "xfs_attr_sf.h"
24#include "xfs_dinode.h" 21#include "xfs_dinode.h"
25#include "xfs_inode.h" 22#include "xfs_inode.h"
26#include "xfs_ag.h" 23#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_log.h" 24#include "xfs_log.h"
29#include "xfs_trans.h" 25#include "xfs_trans.h"
30#include "xfs_sb.h" 26#include "xfs_sb.h"
@@ -127,6 +123,82 @@ typedef struct fstrm_item
127 xfs_inode_t *pip; /* Parent directory inode pointer. */ 123 xfs_inode_t *pip; /* Parent directory inode pointer. */
128} fstrm_item_t; 124} fstrm_item_t;
129 125
126/*
127 * Allocation group filestream associations are tracked with per-ag atomic
128 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
129 * particular AG already has active filestreams associated with it. The mount
130 * point's m_peraglock is used to protect these counters from per-ag array
131 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
132 * about to reallocate the array, it calls xfs_filestream_flush() with the
133 * m_peraglock held in write mode.
134 *
135 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
136 * the cache elements have finished executing before it returns, it's safe for
137 * the free functions to use the atomic counters without m_peraglock protection.
138 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
139 * whether it was called with the m_peraglock held in read mode, write mode or
140 * not held at all. The race condition this addresses is the following:
141 *
142 * - The work queue scheduler fires and pulls a filestream directory cache
143 * element off the LRU end of the cache for deletion, then gets pre-empted.
144 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
145 * remaining items from the cache and reallocates the mount point's per-ag
146 * array, resetting all the counters to zero.
147 * - The work queue thread resumes and calls the free function for the element
148 * it started cleaning up earlier. In the process it decrements the
149 * filestreams counter for an AG that now has no references.
150 *
151 * With a shrinkfs feature, the above scenario could panic the system.
152 *
153 * All other uses of the following macros should be protected by either the
154 * m_peraglock held in read mode, or the cache's internal locking exposed by the
155 * interval between a call to xfs_mru_cache_lookup() and a call to
156 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
157 * when new elements are added to the cache.
158 *
159 * Combined, these locking rules ensure that no associations will ever exist in
160 * the cache that reference per-ag array elements that have since been
161 * reallocated.
162 */
163static int
164xfs_filestream_peek_ag(
165 xfs_mount_t *mp,
166 xfs_agnumber_t agno)
167{
168 struct xfs_perag *pag;
169 int ret;
170
171 pag = xfs_perag_get(mp, agno);
172 ret = atomic_read(&pag->pagf_fstrms);
173 xfs_perag_put(pag);
174 return ret;
175}
176
177static int
178xfs_filestream_get_ag(
179 xfs_mount_t *mp,
180 xfs_agnumber_t agno)
181{
182 struct xfs_perag *pag;
183 int ret;
184
185 pag = xfs_perag_get(mp, agno);
186 ret = atomic_inc_return(&pag->pagf_fstrms);
187 xfs_perag_put(pag);
188 return ret;
189}
190
191static void
192xfs_filestream_put_ag(
193 xfs_mount_t *mp,
194 xfs_agnumber_t agno)
195{
196 struct xfs_perag *pag;
197
198 pag = xfs_perag_get(mp, agno);
199 atomic_dec(&pag->pagf_fstrms);
200 xfs_perag_put(pag);
201}
130 202
131/* 203/*
132 * Scan the AGs starting at startag looking for an AG that isn't in use and has 204 * Scan the AGs starting at startag looking for an AG that isn't in use and has
@@ -355,16 +427,14 @@ xfs_fstrm_free_func(
355{ 427{
356 fstrm_item_t *item = (fstrm_item_t *)data; 428 fstrm_item_t *item = (fstrm_item_t *)data;
357 xfs_inode_t *ip = item->ip; 429 xfs_inode_t *ip = item->ip;
358 int ref;
359 430
360 ASSERT(ip->i_ino == ino); 431 ASSERT(ip->i_ino == ino);
361 432
362 xfs_iflags_clear(ip, XFS_IFILESTREAM); 433 xfs_iflags_clear(ip, XFS_IFILESTREAM);
363 434
364 /* Drop the reference taken on the AG when the item was added. */ 435 /* Drop the reference taken on the AG when the item was added. */
365 ref = xfs_filestream_put_ag(ip->i_mount, item->ag); 436 xfs_filestream_put_ag(ip->i_mount, item->ag);
366 437
367 ASSERT(ref >= 0);
368 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, 438 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
369 xfs_filestream_peek_ag(ip->i_mount, item->ag)); 439 xfs_filestream_peek_ag(ip->i_mount, item->ag));
370 440
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 260f757bbc5d..09dd9af45434 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -42,88 +42,6 @@ extern ktrace_t *xfs_filestreams_trace_buf;
42 42
43#endif 43#endif
44 44
45/*
46 * Allocation group filestream associations are tracked with per-ag atomic
47 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
48 * particular AG already has active filestreams associated with it. The mount
49 * point's m_peraglock is used to protect these counters from per-ag array
50 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
51 * about to reallocate the array, it calls xfs_filestream_flush() with the
52 * m_peraglock held in write mode.
53 *
54 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
55 * the cache elements have finished executing before it returns, it's safe for
56 * the free functions to use the atomic counters without m_peraglock protection.
57 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
58 * whether it was called with the m_peraglock held in read mode, write mode or
59 * not held at all. The race condition this addresses is the following:
60 *
61 * - The work queue scheduler fires and pulls a filestream directory cache
62 * element off the LRU end of the cache for deletion, then gets pre-empted.
63 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
64 * remaining items from the cache and reallocates the mount point's per-ag
65 * array, resetting all the counters to zero.
66 * - The work queue thread resumes and calls the free function for the element
67 * it started cleaning up earlier. In the process it decrements the
68 * filestreams counter for an AG that now has no references.
69 *
70 * With a shrinkfs feature, the above scenario could panic the system.
71 *
72 * All other uses of the following macros should be protected by either the
73 * m_peraglock held in read mode, or the cache's internal locking exposed by the
74 * interval between a call to xfs_mru_cache_lookup() and a call to
75 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
76 * when new elements are added to the cache.
77 *
78 * Combined, these locking rules ensure that no associations will ever exist in
79 * the cache that reference per-ag array elements that have since been
80 * reallocated.
81 */
82/*
83 * xfs_filestream_peek_ag is only used in tracing code
84 */
85static inline int
86xfs_filestream_peek_ag(
87 xfs_mount_t *mp,
88 xfs_agnumber_t agno)
89{
90 struct xfs_perag *pag;
91 int ret;
92
93 pag = xfs_perag_get(mp, agno);
94 ret = atomic_read(&pag->pagf_fstrms);
95 xfs_perag_put(pag);
96 return ret;
97}
98
99static inline int
100xfs_filestream_get_ag(
101 xfs_mount_t *mp,
102 xfs_agnumber_t agno)
103{
104 struct xfs_perag *pag;
105 int ret;
106
107 pag = xfs_perag_get(mp, agno);
108 ret = atomic_inc_return(&pag->pagf_fstrms);
109 xfs_perag_put(pag);
110 return ret;
111}
112
113static inline int
114xfs_filestream_put_ag(
115 xfs_mount_t *mp,
116 xfs_agnumber_t agno)
117{
118 struct xfs_perag *pag;
119 int ret;
120
121 pag = xfs_perag_get(mp, agno);
122 ret = atomic_dec_return(&pag->pagf_fstrms);
123 xfs_perag_put(pag);
124 return ret;
125}
126
127/* allocation selection flags */ 45/* allocation selection flags */
128typedef enum xfs_fstrm_alloc { 46typedef enum xfs_fstrm_alloc {
129 XFS_PICK_USERDATA = 1, 47 XFS_PICK_USERDATA = 1,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 37a6f62c57b6..dbca5f5c37ba 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
@@ -626,8 +622,7 @@ xfs_fs_log_dummy(
626 ip = mp->m_rootip; 622 ip = mp->m_rootip;
627 xfs_ilock(ip, XFS_ILOCK_EXCL); 623 xfs_ilock(ip, XFS_ILOCK_EXCL);
628 624
629 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 625 xfs_trans_ijoin(tp, ip);
630 xfs_trans_ihold(tp, ip);
631 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 626 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
632 xfs_trans_set_sync(tp); 627 xfs_trans_set_sync(tp);
633 error = xfs_trans_commit(tp, 0); 628 error = xfs_trans_commit(tp, 0);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index c7142a064c48..abf80ae1e95b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c282a9af5393..d352862cefa0 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8f8b91be2c99..b1ecc6f97ade 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -25,14 +25,10 @@
25#include "xfs_trans.h" 25#include "xfs_trans.h"
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_ag.h" 27#include "xfs_ag.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_btree.h" 34#include "xfs_btree.h"
@@ -95,7 +91,7 @@ xfs_inode_alloc(
95 return ip; 91 return ip;
96} 92}
97 93
98STATIC void 94void
99xfs_inode_free( 95xfs_inode_free(
100 struct xfs_inode *ip) 96 struct xfs_inode *ip)
101{ 97{
@@ -212,7 +208,7 @@ xfs_iget_cache_hit(
212 ip->i_flags &= ~XFS_INEW; 208 ip->i_flags &= ~XFS_INEW;
213 ip->i_flags |= XFS_IRECLAIMABLE; 209 ip->i_flags |= XFS_IRECLAIMABLE;
214 __xfs_inode_set_reclaim_tag(pag, ip); 210 __xfs_inode_set_reclaim_tag(pag, ip);
215 trace_xfs_iget_reclaim(ip); 211 trace_xfs_iget_reclaim_fail(ip);
216 goto out_error; 212 goto out_error;
217 } 213 }
218 214
@@ -227,6 +223,7 @@ xfs_iget_cache_hit(
227 } else { 223 } else {
228 /* If the VFS inode is being torn down, pause and try again. */ 224 /* If the VFS inode is being torn down, pause and try again. */
229 if (!igrab(inode)) { 225 if (!igrab(inode)) {
226 trace_xfs_iget_skip(ip);
230 error = EAGAIN; 227 error = EAGAIN;
231 goto out_error; 228 goto out_error;
232 } 229 }
@@ -234,6 +231,7 @@ xfs_iget_cache_hit(
234 /* We've got a live one. */ 231 /* We've got a live one. */
235 spin_unlock(&ip->i_flags_lock); 232 spin_unlock(&ip->i_flags_lock);
236 read_unlock(&pag->pag_ici_lock); 233 read_unlock(&pag->pag_ici_lock);
234 trace_xfs_iget_hit(ip);
237 } 235 }
238 236
239 if (lock_flags != 0) 237 if (lock_flags != 0)
@@ -242,7 +240,6 @@ xfs_iget_cache_hit(
242 xfs_iflags_clear(ip, XFS_ISTALE); 240 xfs_iflags_clear(ip, XFS_ISTALE);
243 XFS_STATS_INC(xs_ig_found); 241 XFS_STATS_INC(xs_ig_found);
244 242
245 trace_xfs_iget_found(ip);
246 return 0; 243 return 0;
247 244
248out_error: 245out_error:
@@ -264,7 +261,6 @@ xfs_iget_cache_miss(
264{ 261{
265 struct xfs_inode *ip; 262 struct xfs_inode *ip;
266 int error; 263 int error;
267 unsigned long first_index, mask;
268 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 264 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
269 265
270 ip = xfs_inode_alloc(mp, ino); 266 ip = xfs_inode_alloc(mp, ino);
@@ -275,7 +271,7 @@ xfs_iget_cache_miss(
275 if (error) 271 if (error)
276 goto out_destroy; 272 goto out_destroy;
277 273
278 xfs_itrace_entry(ip); 274 trace_xfs_iget_miss(ip);
279 275
280 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 276 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
281 error = ENOENT; 277 error = ENOENT;
@@ -301,8 +297,6 @@ xfs_iget_cache_miss(
301 BUG(); 297 BUG();
302 } 298 }
303 299
304 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
305 first_index = agino & mask;
306 write_lock(&pag->pag_ici_lock); 300 write_lock(&pag->pag_ici_lock);
307 301
308 /* insert the new inode */ 302 /* insert the new inode */
@@ -321,7 +315,6 @@ xfs_iget_cache_miss(
321 write_unlock(&pag->pag_ici_lock); 315 write_unlock(&pag->pag_ici_lock);
322 radix_tree_preload_end(); 316 radix_tree_preload_end();
323 317
324 trace_xfs_iget_alloc(ip);
325 *ipp = ip; 318 *ipp = ip;
326 return 0; 319 return 0;
327 320
@@ -422,97 +415,6 @@ out_error_or_again:
422} 415}
423 416
424/* 417/*
425 * Decrement reference count of an inode structure and unlock it.
426 *
427 * ip -- the inode being released
428 * lock_flags -- this parameter indicates the inode's locks to be
429 * to be released. See the comment on xfs_iunlock() for a list
430 * of valid values.
431 */
432void
433xfs_iput(xfs_inode_t *ip,
434 uint lock_flags)
435{
436 xfs_itrace_entry(ip);
437 xfs_iunlock(ip, lock_flags);
438 IRELE(ip);
439}
440
441/*
442 * Special iput for brand-new inodes that are still locked
443 */
444void
445xfs_iput_new(
446 xfs_inode_t *ip,
447 uint lock_flags)
448{
449 struct inode *inode = VFS_I(ip);
450
451 xfs_itrace_entry(ip);
452
453 if ((ip->i_d.di_mode == 0)) {
454 ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
455 make_bad_inode(inode);
456 }
457 if (inode->i_state & I_NEW)
458 unlock_new_inode(inode);
459 if (lock_flags)
460 xfs_iunlock(ip, lock_flags);
461 IRELE(ip);
462}
463
464/*
465 * This is called free all the memory associated with an inode.
466 * It must free the inode itself and any buffers allocated for
467 * if_extents/if_data and if_broot. It must also free the lock
468 * associated with the inode.
469 *
470 * Note: because we don't initialise everything on reallocation out
471 * of the zone, we must ensure we nullify everything correctly before
472 * freeing the structure.
473 */
474void
475xfs_ireclaim(
476 struct xfs_inode *ip)
477{
478 struct xfs_mount *mp = ip->i_mount;
479 struct xfs_perag *pag;
480 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
481
482 XFS_STATS_INC(xs_ig_reclaims);
483
484 /*
485 * Remove the inode from the per-AG radix tree.
486 *
487 * Because radix_tree_delete won't complain even if the item was never
488 * added to the tree assert that it's been there before to catch
489 * problems with the inode life time early on.
490 */
491 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
492 write_lock(&pag->pag_ici_lock);
493 if (!radix_tree_delete(&pag->pag_ici_root, agino))
494 ASSERT(0);
495 write_unlock(&pag->pag_ici_lock);
496 xfs_perag_put(pag);
497
498 /*
499 * Here we do an (almost) spurious inode lock in order to coordinate
500 * with inode cache radix tree lookups. This is because the lookup
501 * can reference the inodes in the cache without taking references.
502 *
503 * We make that OK here by ensuring that we wait until the inode is
504 * unlocked after the lookup before we go ahead and free it. We get
505 * both the ilock and the iolock because the code may need to drop the
506 * ilock one but will still hold the iolock.
507 */
508 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
509 xfs_qm_dqdetach(ip);
510 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
511
512 xfs_inode_free(ip);
513}
514
515/*
516 * This is a wrapper routine around the xfs_ilock() routine 418 * This is a wrapper routine around the xfs_ilock() routine
517 * used to centralize some grungy code. It is used in places 419 * used to centralize some grungy code. It is used in places
518 * that wish to lock the inode solely for reading the extents. 420 * that wish to lock the inode solely for reading the extents.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b76a829d7e20..68415cb4f23c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -27,13 +27,10 @@
27#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
28#include "xfs_sb.h" 28#include "xfs_sb.h"
29#include "xfs_ag.h" 29#include "xfs_ag.h"
30#include "xfs_dir2.h"
31#include "xfs_dmapi.h"
32#include "xfs_mount.h" 30#include "xfs_mount.h"
33#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
34#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
35#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
36#include "xfs_dir2_sf.h"
37#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
38#include "xfs_dinode.h" 35#include "xfs_dinode.h"
39#include "xfs_inode.h" 36#include "xfs_inode.h"
@@ -44,7 +41,6 @@
44#include "xfs_alloc.h" 41#include "xfs_alloc.h"
45#include "xfs_ialloc.h" 42#include "xfs_ialloc.h"
46#include "xfs_bmap.h" 43#include "xfs_bmap.h"
47#include "xfs_rw.h"
48#include "xfs_error.h" 44#include "xfs_error.h"
49#include "xfs_utils.h" 45#include "xfs_utils.h"
50#include "xfs_quota.h" 46#include "xfs_quota.h"
@@ -426,7 +422,7 @@ xfs_iformat(
426 if (!XFS_DFORK_Q(dip)) 422 if (!XFS_DFORK_Q(dip))
427 return 0; 423 return 0;
428 ASSERT(ip->i_afp == NULL); 424 ASSERT(ip->i_afp == NULL);
429 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 425 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
430 ip->i_afp->if_ext_max = 426 ip->i_afp->if_ext_max =
431 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 427 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
432 switch (dip->di_aformat) { 428 switch (dip->di_aformat) {
@@ -509,7 +505,7 @@ xfs_iformat_local(
509 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 505 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
510 else { 506 else {
511 real_size = roundup(size, 4); 507 real_size = roundup(size, 4);
512 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 508 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
513 } 509 }
514 ifp->if_bytes = size; 510 ifp->if_bytes = size;
515 ifp->if_real_bytes = real_size; 511 ifp->if_real_bytes = real_size;
@@ -636,7 +632,7 @@ xfs_iformat_btree(
636 } 632 }
637 633
638 ifp->if_broot_bytes = size; 634 ifp->if_broot_bytes = size;
639 ifp->if_broot = kmem_alloc(size, KM_SLEEP); 635 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
640 ASSERT(ifp->if_broot != NULL); 636 ASSERT(ifp->if_broot != NULL);
641 /* 637 /*
642 * Copy and convert from the on-disk structure 638 * Copy and convert from the on-disk structure
@@ -922,7 +918,6 @@ xfs_iread_extents(
922 int error; 918 int error;
923 xfs_ifork_t *ifp; 919 xfs_ifork_t *ifp;
924 xfs_extnum_t nextents; 920 xfs_extnum_t nextents;
925 size_t size;
926 921
927 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 922 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
928 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 923 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
@@ -930,7 +925,6 @@ xfs_iread_extents(
930 return XFS_ERROR(EFSCORRUPTED); 925 return XFS_ERROR(EFSCORRUPTED);
931 } 926 }
932 nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 927 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
933 size = nextents * sizeof(xfs_bmbt_rec_t);
934 ifp = XFS_IFORK_PTR(ip, whichfork); 928 ifp = XFS_IFORK_PTR(ip, whichfork);
935 929
936 /* 930 /*
@@ -1226,7 +1220,7 @@ xfs_isize_check(
1226 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 1220 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
1227 map_first), 1221 map_first),
1228 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 1222 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
1229 NULL, NULL)) 1223 NULL))
1230 return; 1224 return;
1231 ASSERT(nimaps == 1); 1225 ASSERT(nimaps == 1);
1232 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 1226 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
@@ -1460,7 +1454,7 @@ xfs_itruncate_finish(
1460 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1454 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
1461 ASSERT(ip->i_transp == *tp); 1455 ASSERT(ip->i_transp == *tp);
1462 ASSERT(ip->i_itemp != NULL); 1456 ASSERT(ip->i_itemp != NULL);
1463 ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); 1457 ASSERT(ip->i_itemp->ili_lock_flags == 0);
1464 1458
1465 1459
1466 ntp = *tp; 1460 ntp = *tp;
@@ -1589,11 +1583,10 @@ xfs_itruncate_finish(
1589 xfs_bmap_init(&free_list, &first_block); 1583 xfs_bmap_init(&free_list, &first_block);
1590 error = xfs_bunmapi(ntp, ip, 1584 error = xfs_bunmapi(ntp, ip,
1591 first_unmap_block, unmap_len, 1585 first_unmap_block, unmap_len,
1592 xfs_bmapi_aflag(fork) | 1586 xfs_bmapi_aflag(fork),
1593 (sync ? 0 : XFS_BMAPI_ASYNC),
1594 XFS_ITRUNC_MAX_EXTENTS, 1587 XFS_ITRUNC_MAX_EXTENTS,
1595 &first_block, &free_list, 1588 &first_block, &free_list,
1596 NULL, &done); 1589 &done);
1597 if (error) { 1590 if (error) {
1598 /* 1591 /*
1599 * If the bunmapi call encounters an error, 1592 * If the bunmapi call encounters an error,
@@ -1612,12 +1605,8 @@ xfs_itruncate_finish(
1612 */ 1605 */
1613 error = xfs_bmap_finish(tp, &free_list, &committed); 1606 error = xfs_bmap_finish(tp, &free_list, &committed);
1614 ntp = *tp; 1607 ntp = *tp;
1615 if (committed) { 1608 if (committed)
1616 /* link the inode into the next xact in the chain */ 1609 xfs_trans_ijoin(ntp, ip);
1617 xfs_trans_ijoin(ntp, ip,
1618 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1619 xfs_trans_ihold(ntp, ip);
1620 }
1621 1610
1622 if (error) { 1611 if (error) {
1623 /* 1612 /*
@@ -1646,9 +1635,7 @@ xfs_itruncate_finish(
1646 error = xfs_trans_commit(*tp, 0); 1635 error = xfs_trans_commit(*tp, 0);
1647 *tp = ntp; 1636 *tp = ntp;
1648 1637
1649 /* link the inode into the next transaction in the chain */ 1638 xfs_trans_ijoin(ntp, ip);
1650 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1651 xfs_trans_ihold(ntp, ip);
1652 1639
1653 if (error) 1640 if (error)
1654 return error; 1641 return error;
@@ -1985,7 +1972,7 @@ xfs_ifree_cluster(
1985 if (lip->li_type == XFS_LI_INODE) { 1972 if (lip->li_type == XFS_LI_INODE) {
1986 iip = (xfs_inode_log_item_t *)lip; 1973 iip = (xfs_inode_log_item_t *)lip;
1987 ASSERT(iip->ili_logged == 1); 1974 ASSERT(iip->ili_logged == 1);
1988 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 1975 lip->li_cb = xfs_istale_done;
1989 xfs_trans_ail_copy_lsn(mp->m_ail, 1976 xfs_trans_ail_copy_lsn(mp->m_ail,
1990 &iip->ili_flush_lsn, 1977 &iip->ili_flush_lsn,
1991 &iip->ili_item.li_lsn); 1978 &iip->ili_item.li_lsn);
@@ -2055,9 +2042,8 @@ xfs_ifree_cluster(
2055 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2042 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2056 &iip->ili_item.li_lsn); 2043 &iip->ili_item.li_lsn);
2057 2044
2058 xfs_buf_attach_iodone(bp, 2045 xfs_buf_attach_iodone(bp, xfs_istale_done,
2059 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2046 &iip->ili_item);
2060 xfs_istale_done, (xfs_log_item_t *)iip);
2061 2047
2062 if (ip != free_ip) 2048 if (ip != free_ip)
2063 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2049 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -2203,7 +2189,7 @@ xfs_iroot_realloc(
2203 */ 2189 */
2204 if (ifp->if_broot_bytes == 0) { 2190 if (ifp->if_broot_bytes == 0) {
2205 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2191 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
2206 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); 2192 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
2207 ifp->if_broot_bytes = (int)new_size; 2193 ifp->if_broot_bytes = (int)new_size;
2208 return; 2194 return;
2209 } 2195 }
@@ -2219,7 +2205,7 @@ xfs_iroot_realloc(
2219 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2205 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2220 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 2206 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2221 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2207 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
2222 KM_SLEEP); 2208 KM_SLEEP | KM_NOFS);
2223 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2209 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2224 ifp->if_broot_bytes); 2210 ifp->if_broot_bytes);
2225 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2211 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -2245,7 +2231,7 @@ xfs_iroot_realloc(
2245 else 2231 else
2246 new_size = 0; 2232 new_size = 0;
2247 if (new_size > 0) { 2233 if (new_size > 0) {
2248 new_broot = kmem_alloc(new_size, KM_SLEEP); 2234 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
2249 /* 2235 /*
2250 * First copy over the btree block header. 2236 * First copy over the btree block header.
2251 */ 2237 */
@@ -2349,7 +2335,8 @@ xfs_idata_realloc(
2349 real_size = roundup(new_size, 4); 2335 real_size = roundup(new_size, 4);
2350 if (ifp->if_u1.if_data == NULL) { 2336 if (ifp->if_u1.if_data == NULL) {
2351 ASSERT(ifp->if_real_bytes == 0); 2337 ASSERT(ifp->if_real_bytes == 0);
2352 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2338 ifp->if_u1.if_data = kmem_alloc(real_size,
2339 KM_SLEEP | KM_NOFS);
2353 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2340 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2354 /* 2341 /*
2355 * Only do the realloc if the underlying size 2342 * Only do the realloc if the underlying size
@@ -2360,11 +2347,12 @@ xfs_idata_realloc(
2360 kmem_realloc(ifp->if_u1.if_data, 2347 kmem_realloc(ifp->if_u1.if_data,
2361 real_size, 2348 real_size,
2362 ifp->if_real_bytes, 2349 ifp->if_real_bytes,
2363 KM_SLEEP); 2350 KM_SLEEP | KM_NOFS);
2364 } 2351 }
2365 } else { 2352 } else {
2366 ASSERT(ifp->if_real_bytes == 0); 2353 ASSERT(ifp->if_real_bytes == 0);
2367 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2354 ifp->if_u1.if_data = kmem_alloc(real_size,
2355 KM_SLEEP | KM_NOFS);
2368 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 2356 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
2369 ifp->if_bytes); 2357 ifp->if_bytes);
2370 } 2358 }
@@ -2731,7 +2719,6 @@ cluster_corrupt_out:
2731 * mark it as stale and brelse. 2719 * mark it as stale and brelse.
2732 */ 2720 */
2733 if (XFS_BUF_IODONE_FUNC(bp)) { 2721 if (XFS_BUF_IODONE_FUNC(bp)) {
2734 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
2735 XFS_BUF_UNDONE(bp); 2722 XFS_BUF_UNDONE(bp);
2736 XFS_BUF_STALE(bp); 2723 XFS_BUF_STALE(bp);
2737 XFS_BUF_ERROR(bp,EIO); 2724 XFS_BUF_ERROR(bp,EIO);
@@ -3069,8 +3056,7 @@ xfs_iflush_int(
3069 * and unlock the inode's flush lock when the inode is 3056 * and unlock the inode's flush lock when the inode is
3070 * completely written to disk. 3057 * completely written to disk.
3071 */ 3058 */
3072 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) 3059 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
3073 xfs_iflush_done, (xfs_log_item_t *)iip);
3074 3060
3075 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 3061 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
3076 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 3062 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
@@ -3514,13 +3500,11 @@ xfs_iext_remove_indirect(
3514 xfs_extnum_t ext_diff; /* extents to remove in current list */ 3500 xfs_extnum_t ext_diff; /* extents to remove in current list */
3515 xfs_extnum_t nex1; /* number of extents before idx */ 3501 xfs_extnum_t nex1; /* number of extents before idx */
3516 xfs_extnum_t nex2; /* extents after idx + count */ 3502 xfs_extnum_t nex2; /* extents after idx + count */
3517 int nlists; /* entries in indirection array */
3518 int page_idx = idx; /* index in target extent list */ 3503 int page_idx = idx; /* index in target extent list */
3519 3504
3520 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3505 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3521 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 3506 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
3522 ASSERT(erp != NULL); 3507 ASSERT(erp != NULL);
3523 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
3524 nex1 = page_idx; 3508 nex1 = page_idx;
3525 ext_cnt = count; 3509 ext_cnt = count;
3526 while (ext_cnt) { 3510 while (ext_cnt) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 78550df13cd6..0898c5417d12 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -443,8 +443,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
443 */ 443 */
444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
445 uint, uint, xfs_inode_t **); 445 uint, uint, xfs_inode_t **);
446void xfs_iput(xfs_inode_t *, uint);
447void xfs_iput_new(xfs_inode_t *, uint);
448void xfs_ilock(xfs_inode_t *, uint); 446void xfs_ilock(xfs_inode_t *, uint);
449int xfs_ilock_nowait(xfs_inode_t *, uint); 447int xfs_ilock_nowait(xfs_inode_t *, uint);
450void xfs_iunlock(xfs_inode_t *, uint); 448void xfs_iunlock(xfs_inode_t *, uint);
@@ -452,7 +450,7 @@ void xfs_ilock_demote(xfs_inode_t *, uint);
452int xfs_isilocked(xfs_inode_t *, uint); 450int xfs_isilocked(xfs_inode_t *, uint);
453uint xfs_ilock_map_shared(xfs_inode_t *); 451uint xfs_ilock_map_shared(xfs_inode_t *);
454void xfs_iunlock_map_shared(xfs_inode_t *, uint); 452void xfs_iunlock_map_shared(xfs_inode_t *, uint);
455void xfs_ireclaim(xfs_inode_t *); 453void xfs_inode_free(struct xfs_inode *ip);
456 454
457/* 455/*
458 * xfs_inode.c prototypes. 456 * xfs_inode.c prototypes.
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index cf8249a60004..fe00777e2796 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -22,30 +22,26 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_buf_item.h"
26#include "xfs_sb.h" 25#include "xfs_sb.h"
27#include "xfs_ag.h" 26#include "xfs_ag.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 27#include "xfs_mount.h"
31#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
32#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 30#include "xfs_dinode.h"
38#include "xfs_inode.h" 31#include "xfs_inode.h"
39#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
40#include "xfs_btree.h"
41#include "xfs_ialloc.h"
42#include "xfs_rw.h"
43#include "xfs_error.h" 33#include "xfs_error.h"
44#include "xfs_trace.h" 34#include "xfs_trace.h"
45 35
46 36
47kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 37kmem_zone_t *xfs_ili_zone; /* inode log item zone */
48 38
39static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
40{
41 return container_of(lip, struct xfs_inode_log_item, ili_item);
42}
43
44
49/* 45/*
50 * This returns the number of iovecs needed to log the given inode item. 46 * This returns the number of iovecs needed to log the given inode item.
51 * 47 *
@@ -55,13 +51,11 @@ kmem_zone_t *xfs_ili_zone; /* inode log item zone */
55 */ 51 */
56STATIC uint 52STATIC uint
57xfs_inode_item_size( 53xfs_inode_item_size(
58 xfs_inode_log_item_t *iip) 54 struct xfs_log_item *lip)
59{ 55{
60 uint nvecs; 56 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
61 xfs_inode_t *ip; 57 struct xfs_inode *ip = iip->ili_inode;
62 58 uint nvecs = 2;
63 ip = iip->ili_inode;
64 nvecs = 2;
65 59
66 /* 60 /*
67 * Only log the data/extents/b-tree root if there is something 61 * Only log the data/extents/b-tree root if there is something
@@ -212,21 +206,17 @@ xfs_inode_item_size(
212 */ 206 */
213STATIC void 207STATIC void
214xfs_inode_item_format( 208xfs_inode_item_format(
215 xfs_inode_log_item_t *iip, 209 struct xfs_log_item *lip,
216 xfs_log_iovec_t *log_vector) 210 struct xfs_log_iovec *vecp)
217{ 211{
212 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
213 struct xfs_inode *ip = iip->ili_inode;
218 uint nvecs; 214 uint nvecs;
219 xfs_log_iovec_t *vecp;
220 xfs_inode_t *ip;
221 size_t data_bytes; 215 size_t data_bytes;
222 xfs_bmbt_rec_t *ext_buffer; 216 xfs_bmbt_rec_t *ext_buffer;
223 int nrecs;
224 xfs_mount_t *mp; 217 xfs_mount_t *mp;
225 218
226 ip = iip->ili_inode; 219 vecp->i_addr = &iip->ili_format;
227 vecp = log_vector;
228
229 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
230 vecp->i_len = sizeof(xfs_inode_log_format_t); 220 vecp->i_len = sizeof(xfs_inode_log_format_t);
231 vecp->i_type = XLOG_REG_TYPE_IFORMAT; 221 vecp->i_type = XLOG_REG_TYPE_IFORMAT;
232 vecp++; 222 vecp++;
@@ -277,7 +267,7 @@ xfs_inode_item_format(
277 */ 267 */
278 xfs_synchronize_times(ip); 268 xfs_synchronize_times(ip);
279 269
280 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 270 vecp->i_addr = &ip->i_d;
281 vecp->i_len = sizeof(struct xfs_icdinode); 271 vecp->i_len = sizeof(struct xfs_icdinode);
282 vecp->i_type = XLOG_REG_TYPE_ICORE; 272 vecp->i_type = XLOG_REG_TYPE_ICORE;
283 vecp++; 273 vecp++;
@@ -323,18 +313,17 @@ xfs_inode_item_format(
323 ASSERT(ip->i_df.if_u1.if_extents != NULL); 313 ASSERT(ip->i_df.if_u1.if_extents != NULL);
324 ASSERT(ip->i_d.di_nextents > 0); 314 ASSERT(ip->i_d.di_nextents > 0);
325 ASSERT(iip->ili_extents_buf == NULL); 315 ASSERT(iip->ili_extents_buf == NULL);
326 nrecs = ip->i_df.if_bytes / 316 ASSERT((ip->i_df.if_bytes /
327 (uint)sizeof(xfs_bmbt_rec_t); 317 (uint)sizeof(xfs_bmbt_rec_t)) > 0);
328 ASSERT(nrecs > 0);
329#ifdef XFS_NATIVE_HOST 318#ifdef XFS_NATIVE_HOST
330 if (nrecs == ip->i_d.di_nextents) { 319 if (ip->i_d.di_nextents == ip->i_df.if_bytes /
320 (uint)sizeof(xfs_bmbt_rec_t)) {
331 /* 321 /*
332 * There are no delayed allocation 322 * There are no delayed allocation
333 * extents, so just point to the 323 * extents, so just point to the
334 * real extents array. 324 * real extents array.
335 */ 325 */
336 vecp->i_addr = 326 vecp->i_addr = ip->i_df.if_u1.if_extents;
337 (char *)(ip->i_df.if_u1.if_extents);
338 vecp->i_len = ip->i_df.if_bytes; 327 vecp->i_len = ip->i_df.if_bytes;
339 vecp->i_type = XLOG_REG_TYPE_IEXT; 328 vecp->i_type = XLOG_REG_TYPE_IEXT;
340 } else 329 } else
@@ -352,7 +341,7 @@ xfs_inode_item_format(
352 ext_buffer = kmem_alloc(ip->i_df.if_bytes, 341 ext_buffer = kmem_alloc(ip->i_df.if_bytes,
353 KM_SLEEP); 342 KM_SLEEP);
354 iip->ili_extents_buf = ext_buffer; 343 iip->ili_extents_buf = ext_buffer;
355 vecp->i_addr = (xfs_caddr_t)ext_buffer; 344 vecp->i_addr = ext_buffer;
356 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 345 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
357 XFS_DATA_FORK); 346 XFS_DATA_FORK);
358 vecp->i_type = XLOG_REG_TYPE_IEXT; 347 vecp->i_type = XLOG_REG_TYPE_IEXT;
@@ -371,7 +360,7 @@ xfs_inode_item_format(
371 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { 360 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
372 ASSERT(ip->i_df.if_broot_bytes > 0); 361 ASSERT(ip->i_df.if_broot_bytes > 0);
373 ASSERT(ip->i_df.if_broot != NULL); 362 ASSERT(ip->i_df.if_broot != NULL);
374 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 363 vecp->i_addr = ip->i_df.if_broot;
375 vecp->i_len = ip->i_df.if_broot_bytes; 364 vecp->i_len = ip->i_df.if_broot_bytes;
376 vecp->i_type = XLOG_REG_TYPE_IBROOT; 365 vecp->i_type = XLOG_REG_TYPE_IBROOT;
377 vecp++; 366 vecp++;
@@ -389,7 +378,7 @@ xfs_inode_item_format(
389 ASSERT(ip->i_df.if_u1.if_data != NULL); 378 ASSERT(ip->i_df.if_u1.if_data != NULL);
390 ASSERT(ip->i_d.di_size > 0); 379 ASSERT(ip->i_d.di_size > 0);
391 380
392 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; 381 vecp->i_addr = ip->i_df.if_u1.if_data;
393 /* 382 /*
394 * Round i_bytes up to a word boundary. 383 * Round i_bytes up to a word boundary.
395 * The underlying memory is guaranteed to 384 * The underlying memory is guaranteed to
@@ -437,7 +426,7 @@ xfs_inode_item_format(
437 * Assert that no attribute-related log flags are set. 426 * Assert that no attribute-related log flags are set.
438 */ 427 */
439 if (!XFS_IFORK_Q(ip)) { 428 if (!XFS_IFORK_Q(ip)) {
440 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 429 ASSERT(nvecs == lip->li_desc->lid_size);
441 iip->ili_format.ilf_size = nvecs; 430 iip->ili_format.ilf_size = nvecs;
442 ASSERT(!(iip->ili_format.ilf_fields & 431 ASSERT(!(iip->ili_format.ilf_fields &
443 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 432 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -449,21 +438,21 @@ xfs_inode_item_format(
449 ASSERT(!(iip->ili_format.ilf_fields & 438 ASSERT(!(iip->ili_format.ilf_fields &
450 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); 439 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
451 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { 440 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
452 ASSERT(ip->i_afp->if_bytes > 0);
453 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
454 ASSERT(ip->i_d.di_anextents > 0);
455#ifdef DEBUG 441#ifdef DEBUG
456 nrecs = ip->i_afp->if_bytes / 442 int nrecs = ip->i_afp->if_bytes /
457 (uint)sizeof(xfs_bmbt_rec_t); 443 (uint)sizeof(xfs_bmbt_rec_t);
458#endif
459 ASSERT(nrecs > 0); 444 ASSERT(nrecs > 0);
460 ASSERT(nrecs == ip->i_d.di_anextents); 445 ASSERT(nrecs == ip->i_d.di_anextents);
446 ASSERT(ip->i_afp->if_bytes > 0);
447 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
448 ASSERT(ip->i_d.di_anextents > 0);
449#endif
461#ifdef XFS_NATIVE_HOST 450#ifdef XFS_NATIVE_HOST
462 /* 451 /*
463 * There are not delayed allocation extents 452 * There are not delayed allocation extents
464 * for attributes, so just point at the array. 453 * for attributes, so just point at the array.
465 */ 454 */
466 vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); 455 vecp->i_addr = ip->i_afp->if_u1.if_extents;
467 vecp->i_len = ip->i_afp->if_bytes; 456 vecp->i_len = ip->i_afp->if_bytes;
468#else 457#else
469 ASSERT(iip->ili_aextents_buf == NULL); 458 ASSERT(iip->ili_aextents_buf == NULL);
@@ -473,7 +462,7 @@ xfs_inode_item_format(
473 ext_buffer = kmem_alloc(ip->i_afp->if_bytes, 462 ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
474 KM_SLEEP); 463 KM_SLEEP);
475 iip->ili_aextents_buf = ext_buffer; 464 iip->ili_aextents_buf = ext_buffer;
476 vecp->i_addr = (xfs_caddr_t)ext_buffer; 465 vecp->i_addr = ext_buffer;
477 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 466 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
478 XFS_ATTR_FORK); 467 XFS_ATTR_FORK);
479#endif 468#endif
@@ -490,7 +479,7 @@ xfs_inode_item_format(
490 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { 479 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
491 ASSERT(ip->i_afp->if_broot_bytes > 0); 480 ASSERT(ip->i_afp->if_broot_bytes > 0);
492 ASSERT(ip->i_afp->if_broot != NULL); 481 ASSERT(ip->i_afp->if_broot != NULL);
493 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 482 vecp->i_addr = ip->i_afp->if_broot;
494 vecp->i_len = ip->i_afp->if_broot_bytes; 483 vecp->i_len = ip->i_afp->if_broot_bytes;
495 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; 484 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
496 vecp++; 485 vecp++;
@@ -506,7 +495,7 @@ xfs_inode_item_format(
506 ASSERT(ip->i_afp->if_bytes > 0); 495 ASSERT(ip->i_afp->if_bytes > 0);
507 ASSERT(ip->i_afp->if_u1.if_data != NULL); 496 ASSERT(ip->i_afp->if_u1.if_data != NULL);
508 497
509 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; 498 vecp->i_addr = ip->i_afp->if_u1.if_data;
510 /* 499 /*
511 * Round i_bytes up to a word boundary. 500 * Round i_bytes up to a word boundary.
512 * The underlying memory is guaranteed to 501 * The underlying memory is guaranteed to
@@ -528,7 +517,7 @@ xfs_inode_item_format(
528 break; 517 break;
529 } 518 }
530 519
531 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 520 ASSERT(nvecs == lip->li_desc->lid_size);
532 iip->ili_format.ilf_size = nvecs; 521 iip->ili_format.ilf_size = nvecs;
533} 522}
534 523
@@ -539,12 +528,14 @@ xfs_inode_item_format(
539 */ 528 */
540STATIC void 529STATIC void
541xfs_inode_item_pin( 530xfs_inode_item_pin(
542 xfs_inode_log_item_t *iip) 531 struct xfs_log_item *lip)
543{ 532{
544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 533 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
534
535 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
545 536
546 trace_xfs_inode_pin(iip->ili_inode, _RET_IP_); 537 trace_xfs_inode_pin(ip, _RET_IP_);
547 atomic_inc(&iip->ili_inode->i_pincount); 538 atomic_inc(&ip->i_pincount);
548} 539}
549 540
550 541
@@ -554,12 +545,12 @@ xfs_inode_item_pin(
554 * 545 *
555 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0. 546 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
556 */ 547 */
557/* ARGSUSED */
558STATIC void 548STATIC void
559xfs_inode_item_unpin( 549xfs_inode_item_unpin(
560 xfs_inode_log_item_t *iip) 550 struct xfs_log_item *lip,
551 int remove)
561{ 552{
562 struct xfs_inode *ip = iip->ili_inode; 553 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
563 554
564 trace_xfs_inode_unpin(ip, _RET_IP_); 555 trace_xfs_inode_unpin(ip, _RET_IP_);
565 ASSERT(atomic_read(&ip->i_pincount) > 0); 556 ASSERT(atomic_read(&ip->i_pincount) > 0);
@@ -567,15 +558,6 @@ xfs_inode_item_unpin(
567 wake_up(&ip->i_ipin_wait); 558 wake_up(&ip->i_ipin_wait);
568} 559}
569 560
570/* ARGSUSED */
571STATIC void
572xfs_inode_item_unpin_remove(
573 xfs_inode_log_item_t *iip,
574 xfs_trans_t *tp)
575{
576 xfs_inode_item_unpin(iip);
577}
578
579/* 561/*
580 * This is called to attempt to lock the inode associated with this 562 * This is called to attempt to lock the inode associated with this
581 * inode log item, in preparation for the push routine which does the actual 563 * inode log item, in preparation for the push routine which does the actual
@@ -591,19 +573,16 @@ xfs_inode_item_unpin_remove(
591 */ 573 */
592STATIC uint 574STATIC uint
593xfs_inode_item_trylock( 575xfs_inode_item_trylock(
594 xfs_inode_log_item_t *iip) 576 struct xfs_log_item *lip)
595{ 577{
596 register xfs_inode_t *ip; 578 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
597 579 struct xfs_inode *ip = iip->ili_inode;
598 ip = iip->ili_inode;
599 580
600 if (xfs_ipincount(ip) > 0) { 581 if (xfs_ipincount(ip) > 0)
601 return XFS_ITEM_PINNED; 582 return XFS_ITEM_PINNED;
602 }
603 583
604 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 584 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
605 return XFS_ITEM_LOCKED; 585 return XFS_ITEM_LOCKED;
606 }
607 586
608 if (!xfs_iflock_nowait(ip)) { 587 if (!xfs_iflock_nowait(ip)) {
609 /* 588 /*
@@ -629,7 +608,7 @@ xfs_inode_item_trylock(
629 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 608 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
630 ASSERT(iip->ili_format.ilf_fields != 0); 609 ASSERT(iip->ili_format.ilf_fields != 0);
631 ASSERT(iip->ili_logged == 0); 610 ASSERT(iip->ili_logged == 0);
632 ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); 611 ASSERT(lip->li_flags & XFS_LI_IN_AIL);
633 } 612 }
634#endif 613#endif
635 return XFS_ITEM_SUCCESS; 614 return XFS_ITEM_SUCCESS;
@@ -643,26 +622,18 @@ xfs_inode_item_trylock(
643 */ 622 */
644STATIC void 623STATIC void
645xfs_inode_item_unlock( 624xfs_inode_item_unlock(
646 xfs_inode_log_item_t *iip) 625 struct xfs_log_item *lip)
647{ 626{
648 uint hold; 627 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
649 uint iolocked; 628 struct xfs_inode *ip = iip->ili_inode;
650 uint lock_flags; 629 unsigned short lock_flags;
651 xfs_inode_t *ip;
652 630
653 ASSERT(iip != NULL);
654 ASSERT(iip->ili_inode->i_itemp != NULL); 631 ASSERT(iip->ili_inode->i_itemp != NULL);
655 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 632 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
656 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 633
657 XFS_ILI_IOLOCKED_EXCL)) ||
658 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
659 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
660 XFS_ILI_IOLOCKED_SHARED)) ||
661 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
662 /* 634 /*
663 * Clear the transaction pointer in the inode. 635 * Clear the transaction pointer in the inode.
664 */ 636 */
665 ip = iip->ili_inode;
666 ip->i_transp = NULL; 637 ip->i_transp = NULL;
667 638
668 /* 639 /*
@@ -686,34 +657,11 @@ xfs_inode_item_unlock(
686 iip->ili_aextents_buf = NULL; 657 iip->ili_aextents_buf = NULL;
687 } 658 }
688 659
689 /* 660 lock_flags = iip->ili_lock_flags;
690 * Figure out if we should unlock the inode or not. 661 iip->ili_lock_flags = 0;
691 */ 662 if (lock_flags) {
692 hold = iip->ili_flags & XFS_ILI_HOLD; 663 xfs_iunlock(iip->ili_inode, lock_flags);
693 664 IRELE(iip->ili_inode);
694 /*
695 * Before clearing out the flags, remember whether we
696 * are holding the inode's IO lock.
697 */
698 iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY;
699
700 /*
701 * Clear out the fields of the inode log item particular
702 * to the current transaction.
703 */
704 iip->ili_flags = 0;
705
706 /*
707 * Unlock the inode if XFS_ILI_HOLD was not set.
708 */
709 if (!hold) {
710 lock_flags = XFS_ILOCK_EXCL;
711 if (iolocked & XFS_ILI_IOLOCKED_EXCL) {
712 lock_flags |= XFS_IOLOCK_EXCL;
713 } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) {
714 lock_flags |= XFS_IOLOCK_SHARED;
715 }
716 xfs_iput(iip->ili_inode, lock_flags);
717 } 665 }
718} 666}
719 667
@@ -725,13 +673,12 @@ xfs_inode_item_unlock(
725 * is the only one that matters. Therefore, simply return the 673 * is the only one that matters. Therefore, simply return the
726 * given lsn. 674 * given lsn.
727 */ 675 */
728/*ARGSUSED*/
729STATIC xfs_lsn_t 676STATIC xfs_lsn_t
730xfs_inode_item_committed( 677xfs_inode_item_committed(
731 xfs_inode_log_item_t *iip, 678 struct xfs_log_item *lip,
732 xfs_lsn_t lsn) 679 xfs_lsn_t lsn)
733{ 680{
734 return (lsn); 681 return lsn;
735} 682}
736 683
737/* 684/*
@@ -743,13 +690,12 @@ xfs_inode_item_committed(
743 */ 690 */
744STATIC void 691STATIC void
745xfs_inode_item_pushbuf( 692xfs_inode_item_pushbuf(
746 xfs_inode_log_item_t *iip) 693 struct xfs_log_item *lip)
747{ 694{
748 xfs_inode_t *ip; 695 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
749 xfs_mount_t *mp; 696 struct xfs_inode *ip = iip->ili_inode;
750 xfs_buf_t *bp; 697 struct xfs_buf *bp;
751 698
752 ip = iip->ili_inode;
753 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 699 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
754 700
755 /* 701 /*
@@ -757,14 +703,13 @@ xfs_inode_item_pushbuf(
757 * inode was taken off the AIL. So, just get out. 703 * inode was taken off the AIL. So, just get out.
758 */ 704 */
759 if (completion_done(&ip->i_flush) || 705 if (completion_done(&ip->i_flush) ||
760 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 706 !(lip->li_flags & XFS_LI_IN_AIL)) {
761 xfs_iunlock(ip, XFS_ILOCK_SHARED); 707 xfs_iunlock(ip, XFS_ILOCK_SHARED);
762 return; 708 return;
763 } 709 }
764 710
765 mp = ip->i_mount; 711 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
766 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 712 iip->ili_format.ilf_len, XBF_TRYLOCK);
767 iip->ili_format.ilf_len, XBF_TRYLOCK);
768 713
769 xfs_iunlock(ip, XFS_ILOCK_SHARED); 714 xfs_iunlock(ip, XFS_ILOCK_SHARED);
770 if (!bp) 715 if (!bp)
@@ -772,10 +717,8 @@ xfs_inode_item_pushbuf(
772 if (XFS_BUF_ISDELAYWRITE(bp)) 717 if (XFS_BUF_ISDELAYWRITE(bp))
773 xfs_buf_delwri_promote(bp); 718 xfs_buf_delwri_promote(bp);
774 xfs_buf_relse(bp); 719 xfs_buf_relse(bp);
775 return;
776} 720}
777 721
778
779/* 722/*
780 * This is called to asynchronously write the inode associated with this 723 * This is called to asynchronously write the inode associated with this
781 * inode log item out to disk. The inode will already have been locked by 724 * inode log item out to disk. The inode will already have been locked by
@@ -783,14 +726,14 @@ xfs_inode_item_pushbuf(
783 */ 726 */
784STATIC void 727STATIC void
785xfs_inode_item_push( 728xfs_inode_item_push(
786 xfs_inode_log_item_t *iip) 729 struct xfs_log_item *lip)
787{ 730{
788 xfs_inode_t *ip; 731 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
789 732 struct xfs_inode *ip = iip->ili_inode;
790 ip = iip->ili_inode;
791 733
792 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 734 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
793 ASSERT(!completion_done(&ip->i_flush)); 735 ASSERT(!completion_done(&ip->i_flush));
736
794 /* 737 /*
795 * Since we were able to lock the inode's flush lock and 738 * Since we were able to lock the inode's flush lock and
796 * we found it on the AIL, the inode must be dirty. This 739 * we found it on the AIL, the inode must be dirty. This
@@ -813,43 +756,34 @@ xfs_inode_item_push(
813 */ 756 */
814 (void) xfs_iflush(ip, 0); 757 (void) xfs_iflush(ip, 0);
815 xfs_iunlock(ip, XFS_ILOCK_SHARED); 758 xfs_iunlock(ip, XFS_ILOCK_SHARED);
816
817 return;
818} 759}
819 760
820/* 761/*
821 * XXX rcc - this one really has to do something. Probably needs 762 * XXX rcc - this one really has to do something. Probably needs
822 * to stamp in a new field in the incore inode. 763 * to stamp in a new field in the incore inode.
823 */ 764 */
824/* ARGSUSED */
825STATIC void 765STATIC void
826xfs_inode_item_committing( 766xfs_inode_item_committing(
827 xfs_inode_log_item_t *iip, 767 struct xfs_log_item *lip,
828 xfs_lsn_t lsn) 768 xfs_lsn_t lsn)
829{ 769{
830 iip->ili_last_lsn = lsn; 770 INODE_ITEM(lip)->ili_last_lsn = lsn;
831 return;
832} 771}
833 772
834/* 773/*
835 * This is the ops vector shared by all buf log items. 774 * This is the ops vector shared by all buf log items.
836 */ 775 */
837static struct xfs_item_ops xfs_inode_item_ops = { 776static struct xfs_item_ops xfs_inode_item_ops = {
838 .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, 777 .iop_size = xfs_inode_item_size,
839 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 778 .iop_format = xfs_inode_item_format,
840 xfs_inode_item_format, 779 .iop_pin = xfs_inode_item_pin,
841 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, 780 .iop_unpin = xfs_inode_item_unpin,
842 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin, 781 .iop_trylock = xfs_inode_item_trylock,
843 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 782 .iop_unlock = xfs_inode_item_unlock,
844 xfs_inode_item_unpin_remove, 783 .iop_committed = xfs_inode_item_committed,
845 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, 784 .iop_push = xfs_inode_item_push,
846 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock, 785 .iop_pushbuf = xfs_inode_item_pushbuf,
847 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 786 .iop_committing = xfs_inode_item_committing
848 xfs_inode_item_committed,
849 .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push,
850 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf,
851 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
852 xfs_inode_item_committing
853}; 787};
854 788
855 789
@@ -858,10 +792,10 @@ static struct xfs_item_ops xfs_inode_item_ops = {
858 */ 792 */
859void 793void
860xfs_inode_item_init( 794xfs_inode_item_init(
861 xfs_inode_t *ip, 795 struct xfs_inode *ip,
862 xfs_mount_t *mp) 796 struct xfs_mount *mp)
863{ 797{
864 xfs_inode_log_item_t *iip; 798 struct xfs_inode_log_item *iip;
865 799
866 ASSERT(ip->i_itemp == NULL); 800 ASSERT(ip->i_itemp == NULL);
867 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); 801 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
@@ -899,14 +833,14 @@ xfs_inode_item_destroy(
899 * from the AIL if it has not been re-logged, and unlocking the inode's 833 * from the AIL if it has not been re-logged, and unlocking the inode's
900 * flush lock. 834 * flush lock.
901 */ 835 */
902/*ARGSUSED*/
903void 836void
904xfs_iflush_done( 837xfs_iflush_done(
905 xfs_buf_t *bp, 838 struct xfs_buf *bp,
906 xfs_inode_log_item_t *iip) 839 struct xfs_log_item *lip)
907{ 840{
841 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
908 xfs_inode_t *ip = iip->ili_inode; 842 xfs_inode_t *ip = iip->ili_inode;
909 struct xfs_ail *ailp = iip->ili_item.li_ailp; 843 struct xfs_ail *ailp = lip->li_ailp;
910 844
911 /* 845 /*
912 * We only want to pull the item from the AIL if it is 846 * We only want to pull the item from the AIL if it is
@@ -917,12 +851,11 @@ xfs_iflush_done(
917 * the lock since it's cheaper, and then we recheck while 851 * the lock since it's cheaper, and then we recheck while
918 * holding the lock before removing the inode from the AIL. 852 * holding the lock before removing the inode from the AIL.
919 */ 853 */
920 if (iip->ili_logged && 854 if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) {
921 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
922 spin_lock(&ailp->xa_lock); 855 spin_lock(&ailp->xa_lock);
923 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { 856 if (lip->li_lsn == iip->ili_flush_lsn) {
924 /* xfs_trans_ail_delete() drops the AIL lock. */ 857 /* xfs_trans_ail_delete() drops the AIL lock. */
925 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); 858 xfs_trans_ail_delete(ailp, lip);
926 } else { 859 } else {
927 spin_unlock(&ailp->xa_lock); 860 spin_unlock(&ailp->xa_lock);
928 } 861 }
@@ -940,8 +873,6 @@ xfs_iflush_done(
940 * Release the inode's flush lock since we're done with it. 873 * Release the inode's flush lock since we're done with it.
941 */ 874 */
942 xfs_ifunlock(ip); 875 xfs_ifunlock(ip);
943
944 return;
945} 876}
946 877
947/* 878/*
@@ -957,10 +888,8 @@ xfs_iflush_abort(
957 xfs_inode_t *ip) 888 xfs_inode_t *ip)
958{ 889{
959 xfs_inode_log_item_t *iip = ip->i_itemp; 890 xfs_inode_log_item_t *iip = ip->i_itemp;
960 xfs_mount_t *mp;
961 891
962 iip = ip->i_itemp; 892 iip = ip->i_itemp;
963 mp = ip->i_mount;
964 if (iip) { 893 if (iip) {
965 struct xfs_ail *ailp = iip->ili_item.li_ailp; 894 struct xfs_ail *ailp = iip->ili_item.li_ailp;
966 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 895 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
@@ -991,10 +920,10 @@ xfs_iflush_abort(
991 920
992void 921void
993xfs_istale_done( 922xfs_istale_done(
994 xfs_buf_t *bp, 923 struct xfs_buf *bp,
995 xfs_inode_log_item_t *iip) 924 struct xfs_log_item *lip)
996{ 925{
997 xfs_iflush_abort(iip->ili_inode); 926 xfs_iflush_abort(INODE_ITEM(lip)->ili_inode);
998} 927}
999 928
1000/* 929/*
@@ -1007,9 +936,8 @@ xfs_inode_item_format_convert(
1007 xfs_inode_log_format_t *in_f) 936 xfs_inode_log_format_t *in_f)
1008{ 937{
1009 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { 938 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
1010 xfs_inode_log_format_32_t *in_f32; 939 xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
1011 940
1012 in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr;
1013 in_f->ilf_type = in_f32->ilf_type; 941 in_f->ilf_type = in_f32->ilf_type;
1014 in_f->ilf_size = in_f32->ilf_size; 942 in_f->ilf_size = in_f32->ilf_size;
1015 in_f->ilf_fields = in_f32->ilf_fields; 943 in_f->ilf_fields = in_f32->ilf_fields;
@@ -1025,9 +953,8 @@ xfs_inode_item_format_convert(
1025 in_f->ilf_boffset = in_f32->ilf_boffset; 953 in_f->ilf_boffset = in_f32->ilf_boffset;
1026 return 0; 954 return 0;
1027 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ 955 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
1028 xfs_inode_log_format_64_t *in_f64; 956 xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
1029 957
1030 in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr;
1031 in_f->ilf_type = in_f64->ilf_type; 958 in_f->ilf_type = in_f64->ilf_type;
1032 in_f->ilf_size = in_f64->ilf_size; 959 in_f->ilf_size = in_f64->ilf_size;
1033 in_f->ilf_fields = in_f64->ilf_fields; 960 in_f->ilf_fields = in_f64->ilf_fields;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 9a467958ecdd..d3dee61e6d91 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -103,12 +103,6 @@ typedef struct xfs_inode_log_format_64 {
103 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 103 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
104 XFS_ILOG_ABROOT) 104 XFS_ILOG_ABROOT)
105 105
106#define XFS_ILI_HOLD 0x1
107#define XFS_ILI_IOLOCKED_EXCL 0x2
108#define XFS_ILI_IOLOCKED_SHARED 0x4
109
110#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
111
112static inline int xfs_ilog_fbroot(int w) 106static inline int xfs_ilog_fbroot(int w)
113{ 107{
114 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); 108 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
@@ -137,7 +131,7 @@ typedef struct xfs_inode_log_item {
137 struct xfs_inode *ili_inode; /* inode ptr */ 131 struct xfs_inode *ili_inode; /* inode ptr */
138 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ 132 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
139 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ 133 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
140 unsigned short ili_flags; /* misc flags */ 134 unsigned short ili_lock_flags; /* lock flags */
141 unsigned short ili_logged; /* flushed logged data */ 135 unsigned short ili_logged; /* flushed logged data */
142 unsigned int ili_last_fields; /* fields when flushed */ 136 unsigned int ili_last_fields; /* fields when flushed */
143 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged 137 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
@@ -161,8 +155,8 @@ static inline int xfs_inode_clean(xfs_inode_t *ip)
161 155
162extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 156extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
163extern void xfs_inode_item_destroy(struct xfs_inode *); 157extern void xfs_inode_item_destroy(struct xfs_inode *);
164extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); 158extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
165extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *); 159extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
166extern void xfs_iflush_abort(struct xfs_inode *); 160extern void xfs_iflush_abort(struct xfs_inode *);
167extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, 161extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
168 xfs_inode_log_format_t *); 162 xfs_inode_log_format_t *);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ef14943829da..20576146369f 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -23,19 +23,14 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h" 26#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h" 27#include "xfs_quota.h"
30#include "xfs_mount.h" 28#include "xfs_mount.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_ialloc.h"
39#include "xfs_btree.h" 34#include "xfs_btree.h"
40#include "xfs_bmap.h" 35#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 36#include "xfs_rtalloc.h"
@@ -123,7 +118,7 @@ xfs_iomap(
123 error = xfs_bmapi(NULL, ip, offset_fsb, 118 error = xfs_bmapi(NULL, ip, offset_fsb,
124 (xfs_filblks_t)(end_fsb - offset_fsb), 119 (xfs_filblks_t)(end_fsb - offset_fsb),
125 bmapi_flags, NULL, 0, imap, 120 bmapi_flags, NULL, 0, imap,
126 nimaps, NULL, NULL); 121 nimaps, NULL);
127 122
128 if (error) 123 if (error)
129 goto out; 124 goto out;
@@ -138,7 +133,7 @@ xfs_iomap(
138 break; 133 break;
139 } 134 }
140 135
141 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { 136 if (flags & BMAPI_DIRECT) {
142 error = xfs_iomap_write_direct(ip, offset, count, flags, 137 error = xfs_iomap_write_direct(ip, offset, count, flags,
143 imap, nimaps); 138 imap, nimaps);
144 } else { 139 } else {
@@ -247,7 +242,7 @@ xfs_iomap_write_direct(
247 xfs_off_t offset, 242 xfs_off_t offset,
248 size_t count, 243 size_t count,
249 int flags, 244 int flags,
250 xfs_bmbt_irec_t *ret_imap, 245 xfs_bmbt_irec_t *imap,
251 int *nmaps) 246 int *nmaps)
252{ 247{
253 xfs_mount_t *mp = ip->i_mount; 248 xfs_mount_t *mp = ip->i_mount;
@@ -261,7 +256,6 @@ xfs_iomap_write_direct(
261 int quota_flag; 256 int quota_flag;
262 int rt; 257 int rt;
263 xfs_trans_t *tp; 258 xfs_trans_t *tp;
264 xfs_bmbt_irec_t imap;
265 xfs_bmap_free_t free_list; 259 xfs_bmap_free_t free_list;
266 uint qblocks, resblks, resrtextents; 260 uint qblocks, resblks, resrtextents;
267 int committed; 261 int committed;
@@ -285,10 +279,10 @@ xfs_iomap_write_direct(
285 if (error) 279 if (error)
286 goto error_out; 280 goto error_out;
287 } else { 281 } else {
288 if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK)) 282 if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK))
289 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 283 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
290 ret_imap->br_blockcount + 284 imap->br_blockcount +
291 ret_imap->br_startoff); 285 imap->br_startoff);
292 } 286 }
293 count_fsb = last_fsb - offset_fsb; 287 count_fsb = last_fsb - offset_fsb;
294 ASSERT(count_fsb > 0); 288 ASSERT(count_fsb > 0);
@@ -334,20 +328,22 @@ xfs_iomap_write_direct(
334 if (error) 328 if (error)
335 goto error1; 329 goto error1;
336 330
337 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 331 xfs_trans_ijoin(tp, ip);
338 xfs_trans_ihold(tp, ip);
339 332
340 bmapi_flag = XFS_BMAPI_WRITE; 333 bmapi_flag = XFS_BMAPI_WRITE;
341 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) 334 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
342 bmapi_flag |= XFS_BMAPI_PREALLOC; 335 bmapi_flag |= XFS_BMAPI_PREALLOC;
343 336
344 /* 337 /*
345 * Issue the xfs_bmapi() call to allocate the blocks 338 * Issue the xfs_bmapi() call to allocate the blocks.
339 *
340 * From this point onwards we overwrite the imap pointer that the
341 * caller gave to us.
346 */ 342 */
347 xfs_bmap_init(&free_list, &firstfsb); 343 xfs_bmap_init(&free_list, &firstfsb);
348 nimaps = 1; 344 nimaps = 1;
349 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, 345 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
350 &firstfsb, 0, &imap, &nimaps, &free_list, NULL); 346 &firstfsb, 0, imap, &nimaps, &free_list);
351 if (error) 347 if (error)
352 goto error0; 348 goto error0;
353 349
@@ -369,12 +365,11 @@ xfs_iomap_write_direct(
369 goto error_out; 365 goto error_out;
370 } 366 }
371 367
372 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) { 368 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
373 error = xfs_cmn_err_fsblock_zero(ip, &imap); 369 error = xfs_cmn_err_fsblock_zero(ip, imap);
374 goto error_out; 370 goto error_out;
375 } 371 }
376 372
377 *ret_imap = imap;
378 *nmaps = 1; 373 *nmaps = 1;
379 return 0; 374 return 0;
380 375
@@ -425,7 +420,7 @@ xfs_iomap_eof_want_preallocate(
425 imaps = nimaps; 420 imaps = nimaps;
426 firstblock = NULLFSBLOCK; 421 firstblock = NULLFSBLOCK;
427 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0, 422 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
428 &firstblock, 0, imap, &imaps, NULL, NULL); 423 &firstblock, 0, imap, &imaps, NULL);
429 if (error) 424 if (error)
430 return error; 425 return error;
431 for (n = 0; n < imaps; n++) { 426 for (n = 0; n < imaps; n++) {
@@ -500,7 +495,7 @@ retry:
500 (xfs_filblks_t)(last_fsb - offset_fsb), 495 (xfs_filblks_t)(last_fsb - offset_fsb),
501 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 496 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
502 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 497 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
503 &nimaps, NULL, NULL); 498 &nimaps, NULL);
504 if (error && (error != ENOSPC)) 499 if (error && (error != ENOSPC))
505 return XFS_ERROR(error); 500 return XFS_ERROR(error);
506 501
@@ -548,7 +543,7 @@ xfs_iomap_write_allocate(
548 xfs_inode_t *ip, 543 xfs_inode_t *ip,
549 xfs_off_t offset, 544 xfs_off_t offset,
550 size_t count, 545 size_t count,
551 xfs_bmbt_irec_t *map, 546 xfs_bmbt_irec_t *imap,
552 int *retmap) 547 int *retmap)
553{ 548{
554 xfs_mount_t *mp = ip->i_mount; 549 xfs_mount_t *mp = ip->i_mount;
@@ -557,7 +552,6 @@ xfs_iomap_write_allocate(
557 xfs_fsblock_t first_block; 552 xfs_fsblock_t first_block;
558 xfs_bmap_free_t free_list; 553 xfs_bmap_free_t free_list;
559 xfs_filblks_t count_fsb; 554 xfs_filblks_t count_fsb;
560 xfs_bmbt_irec_t imap;
561 xfs_trans_t *tp; 555 xfs_trans_t *tp;
562 int nimaps, committed; 556 int nimaps, committed;
563 int error = 0; 557 int error = 0;
@@ -573,8 +567,8 @@ xfs_iomap_write_allocate(
573 return XFS_ERROR(error); 567 return XFS_ERROR(error);
574 568
575 offset_fsb = XFS_B_TO_FSBT(mp, offset); 569 offset_fsb = XFS_B_TO_FSBT(mp, offset);
576 count_fsb = map->br_blockcount; 570 count_fsb = imap->br_blockcount;
577 map_start_fsb = map->br_startoff; 571 map_start_fsb = imap->br_startoff;
578 572
579 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); 573 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
580 574
@@ -602,8 +596,7 @@ xfs_iomap_write_allocate(
602 return XFS_ERROR(error); 596 return XFS_ERROR(error);
603 } 597 }
604 xfs_ilock(ip, XFS_ILOCK_EXCL); 598 xfs_ilock(ip, XFS_ILOCK_EXCL);
605 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 599 xfs_trans_ijoin(tp, ip);
606 xfs_trans_ihold(tp, ip);
607 600
608 xfs_bmap_init(&free_list, &first_block); 601 xfs_bmap_init(&free_list, &first_block);
609 602
@@ -654,10 +647,15 @@ xfs_iomap_write_allocate(
654 } 647 }
655 } 648 }
656 649
657 /* Go get the actual blocks */ 650 /*
651 * Go get the actual blocks.
652 *
653 * From this point onwards we overwrite the imap
654 * pointer that the caller gave to us.
655 */
658 error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, 656 error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
659 XFS_BMAPI_WRITE, &first_block, 1, 657 XFS_BMAPI_WRITE, &first_block, 1,
660 &imap, &nimaps, &free_list, NULL); 658 imap, &nimaps, &free_list);
661 if (error) 659 if (error)
662 goto trans_cancel; 660 goto trans_cancel;
663 661
@@ -676,13 +674,12 @@ xfs_iomap_write_allocate(
676 * See if we were able to allocate an extent that 674 * See if we were able to allocate an extent that
677 * covers at least part of the callers request 675 * covers at least part of the callers request
678 */ 676 */
679 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 677 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
680 return xfs_cmn_err_fsblock_zero(ip, &imap); 678 return xfs_cmn_err_fsblock_zero(ip, imap);
681 679
682 if ((offset_fsb >= imap.br_startoff) && 680 if ((offset_fsb >= imap->br_startoff) &&
683 (offset_fsb < (imap.br_startoff + 681 (offset_fsb < (imap->br_startoff +
684 imap.br_blockcount))) { 682 imap->br_blockcount))) {
685 *map = imap;
686 *retmap = 1; 683 *retmap = 1;
687 XFS_STATS_INC(xs_xstrat_quick); 684 XFS_STATS_INC(xs_xstrat_quick);
688 return 0; 685 return 0;
@@ -692,8 +689,8 @@ xfs_iomap_write_allocate(
692 * So far we have not mapped the requested part of the 689 * So far we have not mapped the requested part of the
693 * file, just surrounding data, try again. 690 * file, just surrounding data, try again.
694 */ 691 */
695 count_fsb -= imap.br_blockcount; 692 count_fsb -= imap->br_blockcount;
696 map_start_fsb = imap.br_startoff + imap.br_blockcount; 693 map_start_fsb = imap->br_startoff + imap->br_blockcount;
697 } 694 }
698 695
699trans_cancel: 696trans_cancel:
@@ -766,8 +763,7 @@ xfs_iomap_write_unwritten(
766 } 763 }
767 764
768 xfs_ilock(ip, XFS_ILOCK_EXCL); 765 xfs_ilock(ip, XFS_ILOCK_EXCL);
769 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 766 xfs_trans_ijoin(tp, ip);
770 xfs_trans_ihold(tp, ip);
771 767
772 /* 768 /*
773 * Modify the unwritten extent state of the buffer. 769 * Modify the unwritten extent state of the buffer.
@@ -776,7 +772,7 @@ xfs_iomap_write_unwritten(
776 nimaps = 1; 772 nimaps = 1;
777 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 773 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
778 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 774 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
779 1, &imap, &nimaps, &free_list, NULL); 775 1, &imap, &nimaps, &free_list);
780 if (error) 776 if (error)
781 goto error_on_bmapi_transaction; 777 goto error_on_bmapi_transaction;
782 778
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 81ac4afd45b3..7748a430f50d 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,17 +18,16 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21typedef enum { 21/* base extent manipulation calls */
22 /* base extent manipulation calls */ 22#define BMAPI_READ (1 << 0) /* read extents */
23 BMAPI_READ = (1 << 0), /* read extents */ 23#define BMAPI_WRITE (1 << 1) /* create extents */
24 BMAPI_WRITE = (1 << 1), /* create extents */ 24#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */
25 BMAPI_ALLOCATE = (1 << 2), /* delayed allocate to real extents */ 25
26 /* modifiers */ 26/* modifiers */
27 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ 27#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */
28 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ 28#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */
29 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ 29#define BMAPI_MMA (1 << 6) /* allocate for mmap write */
30 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ 30#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */
31} bmapi_flags_t;
32 31
33#define BMAPI_FLAGS \ 32#define BMAPI_FLAGS \
34 { BMAPI_READ, "READ" }, \ 33 { BMAPI_READ, "READ" }, \
@@ -36,7 +35,6 @@ typedef enum {
36 { BMAPI_ALLOCATE, "ALLOCATE" }, \ 35 { BMAPI_ALLOCATE, "ALLOCATE" }, \
37 { BMAPI_IGNSTATE, "IGNSTATE" }, \ 36 { BMAPI_IGNSTATE, "IGNSTATE" }, \
38 { BMAPI_DIRECT, "DIRECT" }, \ 37 { BMAPI_DIRECT, "DIRECT" }, \
39 { BMAPI_MMAP, "MMAP" }, \
40 { BMAPI_TRYLOCK, "TRYLOCK" } 38 { BMAPI_TRYLOCK, "TRYLOCK" }
41 39
42struct xfs_inode; 40struct xfs_inode;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 2b86f8610512..7e3626e5925c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -24,20 +24,17 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_ialloc.h" 33#include "xfs_ialloc.h"
38#include "xfs_itable.h" 34#include "xfs_itable.h"
39#include "xfs_error.h" 35#include "xfs_error.h"
40#include "xfs_btree.h" 36#include "xfs_btree.h"
37#include "xfs_trace.h"
41 38
42STATIC int 39STATIC int
43xfs_internal_inum( 40xfs_internal_inum(
@@ -143,7 +140,8 @@ xfs_bulkstat_one_int(
143 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 140 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
144 break; 141 break;
145 } 142 }
146 xfs_iput(ip, XFS_ILOCK_SHARED); 143 xfs_iunlock(ip, XFS_ILOCK_SHARED);
144 IRELE(ip);
147 145
148 error = formatter(buffer, ubsize, ubused, buf); 146 error = formatter(buffer, ubsize, ubused, buf);
149 147
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 5215abc8023a..925d572bf0f4 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -24,8 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_error.h" 28#include "xfs_error.h"
31#include "xfs_log_priv.h" 29#include "xfs_log_priv.h"
@@ -35,8 +33,6 @@
35#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
36#include "xfs_log_recover.h" 34#include "xfs_log_recover.h"
37#include "xfs_trans_priv.h" 35#include "xfs_trans_priv.h"
38#include "xfs_dir2_sf.h"
39#include "xfs_attr_sf.h"
40#include "xfs_dinode.h" 36#include "xfs_dinode.h"
41#include "xfs_inode.h" 37#include "xfs_inode.h"
42#include "xfs_rw.h" 38#include "xfs_rw.h"
@@ -337,7 +333,6 @@ xfs_log_reserve(
337 int retval = 0; 333 int retval = 0;
338 334
339 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 335 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
340 ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
341 336
342 if (XLOG_FORCED_SHUTDOWN(log)) 337 if (XLOG_FORCED_SHUTDOWN(log))
343 return XFS_ERROR(EIO); 338 return XFS_ERROR(EIO);
@@ -552,7 +547,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
552 .magic = XLOG_UNMOUNT_TYPE, 547 .magic = XLOG_UNMOUNT_TYPE,
553 }; 548 };
554 struct xfs_log_iovec reg = { 549 struct xfs_log_iovec reg = {
555 .i_addr = (void *)&magic, 550 .i_addr = &magic,
556 .i_len = sizeof(magic), 551 .i_len = sizeof(magic),
557 .i_type = XLOG_REG_TYPE_UNMOUNT, 552 .i_type = XLOG_REG_TYPE_UNMOUNT,
558 }; 553 };
@@ -1047,7 +1042,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1047 xlog_in_core_t *iclog, *prev_iclog=NULL; 1042 xlog_in_core_t *iclog, *prev_iclog=NULL;
1048 xfs_buf_t *bp; 1043 xfs_buf_t *bp;
1049 int i; 1044 int i;
1050 int iclogsize;
1051 int error = ENOMEM; 1045 int error = ENOMEM;
1052 uint log2_size = 0; 1046 uint log2_size = 0;
1053 1047
@@ -1127,7 +1121,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1127 * with different amounts of memory. See the definition of 1121 * with different amounts of memory. See the definition of
1128 * xlog_in_core_t in xfs_log_priv.h for details. 1122 * xlog_in_core_t in xfs_log_priv.h for details.
1129 */ 1123 */
1130 iclogsize = log->l_iclog_size;
1131 ASSERT(log->l_iclog_size >= 4096); 1124 ASSERT(log->l_iclog_size >= 4096);
1132 for (i=0; i < log->l_iclog_bufs; i++) { 1125 for (i=0; i < log->l_iclog_bufs; i++) {
1133 *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); 1126 *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
@@ -1428,11 +1421,8 @@ xlog_sync(xlog_t *log,
1428 XFS_BUF_BUSY(bp); 1421 XFS_BUF_BUSY(bp);
1429 XFS_BUF_ASYNC(bp); 1422 XFS_BUF_ASYNC(bp);
1430 bp->b_flags |= XBF_LOG_BUFFER; 1423 bp->b_flags |= XBF_LOG_BUFFER;
1431 /* 1424
1432 * Do an ordered write for the log block. 1425 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1433 * Its unnecessary to flush the first split block in the log wrap case.
1434 */
1435 if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER))
1436 XFS_BUF_ORDERED(bp); 1426 XFS_BUF_ORDERED(bp);
1437 1427
1438 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1428 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 04c78e642cc8..916eb7db14d9 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -55,14 +55,10 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
55/* 55/*
56 * Flags to xfs_log_reserve() 56 * Flags to xfs_log_reserve()
57 * 57 *
58 * XFS_LOG_SLEEP: If space is not available, sleep (default)
59 * XFS_LOG_NOSLEEP: If space is not available, return error
60 * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are 58 * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are
61 * performed against this type of reservation, the reservation 59 * performed against this type of reservation, the reservation
62 * is not decreased. Long running transactions should use this. 60 * is not decreased. Long running transactions should use this.
63 */ 61 */
64#define XFS_LOG_SLEEP 0x0
65#define XFS_LOG_NOSLEEP 0x1
66#define XFS_LOG_PERM_RESERV 0x2 62#define XFS_LOG_PERM_RESERV 0x2
67 63
68/* 64/*
@@ -104,7 +100,7 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
104#define XLOG_REG_TYPE_MAX 19 100#define XLOG_REG_TYPE_MAX 19
105 101
106typedef struct xfs_log_iovec { 102typedef struct xfs_log_iovec {
107 xfs_caddr_t i_addr; /* beginning address of region */ 103 void *i_addr; /* beginning address of region */
108 int i_len; /* length in bytes of region */ 104 int i_len; /* length in bytes of region */
109 uint i_type; /* type of region */ 105 uint i_type; /* type of region */
110} xfs_log_iovec_t; 106} xfs_log_iovec_t;
@@ -201,9 +197,4 @@ int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
201bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 197bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
202 198
203#endif 199#endif
204
205
206extern int xlog_debug; /* set to 1 to enable real log */
207
208
209#endif /* __XFS_LOG_H__ */ 200#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index bb17cc044bf3..31e4ea2d19ac 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -26,8 +26,6 @@
26#include "xfs_log_priv.h" 26#include "xfs_log_priv.h"
27#include "xfs_sb.h" 27#include "xfs_sb.h"
28#include "xfs_ag.h" 28#include "xfs_ag.h"
29#include "xfs_dir2.h"
30#include "xfs_dmapi.h"
31#include "xfs_mount.h" 29#include "xfs_mount.h"
32#include "xfs_error.h" 30#include "xfs_error.h"
33#include "xfs_alloc.h" 31#include "xfs_alloc.h"
@@ -554,7 +552,7 @@ xlog_cil_push(
554 thdr.th_type = XFS_TRANS_CHECKPOINT; 552 thdr.th_type = XFS_TRANS_CHECKPOINT;
555 thdr.th_tid = tic->t_tid; 553 thdr.th_tid = tic->t_tid;
556 thdr.th_num_items = num_iovecs; 554 thdr.th_num_items = num_iovecs;
557 lhdr.i_addr = (xfs_caddr_t)&thdr; 555 lhdr.i_addr = &thdr;
558 lhdr.i_len = sizeof(xfs_trans_header_t); 556 lhdr.i_len = sizeof(xfs_trans_header_t);
559 lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; 557 lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
560 tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); 558 tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9ac5cfab27b9..6f3f5fa37acf 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -24,15 +24,11 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_error.h" 28#include "xfs_error.h"
31#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h" 32#include "xfs_dinode.h"
37#include "xfs_inode.h" 33#include "xfs_inode.h"
38#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
@@ -1565,9 +1561,7 @@ xlog_recover_reorder_trans(
1565 1561
1566 list_splice_init(&trans->r_itemq, &sort_list); 1562 list_splice_init(&trans->r_itemq, &sort_list);
1567 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1563 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
1568 xfs_buf_log_format_t *buf_f; 1564 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1569
1570 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
1571 1565
1572 switch (ITEM_TYPE(item)) { 1566 switch (ITEM_TYPE(item)) {
1573 case XFS_LI_BUF: 1567 case XFS_LI_BUF:
@@ -1892,9 +1886,8 @@ xlog_recover_do_inode_buffer(
1892 * current di_next_unlinked field. Extract its value 1886 * current di_next_unlinked field. Extract its value
1893 * and copy it to the buffer copy. 1887 * and copy it to the buffer copy.
1894 */ 1888 */
1895 logged_nextp = (xfs_agino_t *) 1889 logged_nextp = item->ri_buf[item_index].i_addr +
1896 ((char *)(item->ri_buf[item_index].i_addr) + 1890 next_unlinked_offset - reg_buf_offset;
1897 (next_unlinked_offset - reg_buf_offset));
1898 if (unlikely(*logged_nextp == 0)) { 1891 if (unlikely(*logged_nextp == 0)) {
1899 xfs_fs_cmn_err(CE_ALERT, mp, 1892 xfs_fs_cmn_err(CE_ALERT, mp,
1900 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", 1893 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field",
@@ -1973,8 +1966,7 @@ xlog_recover_do_reg_buffer(
1973 item->ri_buf[i].i_len, __func__); 1966 item->ri_buf[i].i_len, __func__);
1974 goto next; 1967 goto next;
1975 } 1968 }
1976 error = xfs_qm_dqcheck((xfs_disk_dquot_t *) 1969 error = xfs_qm_dqcheck(item->ri_buf[i].i_addr,
1977 item->ri_buf[i].i_addr,
1978 -1, 0, XFS_QMOPT_DOWARN, 1970 -1, 0, XFS_QMOPT_DOWARN,
1979 "dquot_buf_recover"); 1971 "dquot_buf_recover");
1980 if (error) 1972 if (error)
@@ -2187,7 +2179,7 @@ xlog_recover_do_buffer_trans(
2187 xlog_recover_item_t *item, 2179 xlog_recover_item_t *item,
2188 int pass) 2180 int pass)
2189{ 2181{
2190 xfs_buf_log_format_t *buf_f; 2182 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
2191 xfs_mount_t *mp; 2183 xfs_mount_t *mp;
2192 xfs_buf_t *bp; 2184 xfs_buf_t *bp;
2193 int error; 2185 int error;
@@ -2197,8 +2189,6 @@ xlog_recover_do_buffer_trans(
2197 ushort flags; 2189 ushort flags;
2198 uint buf_flags; 2190 uint buf_flags;
2199 2191
2200 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
2201
2202 if (pass == XLOG_RECOVER_PASS1) { 2192 if (pass == XLOG_RECOVER_PASS1) {
2203 /* 2193 /*
2204 * In this pass we're only looking for buf items 2194 * In this pass we're only looking for buf items
@@ -2319,10 +2309,9 @@ xlog_recover_do_inode_trans(
2319 } 2309 }
2320 2310
2321 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2311 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2322 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; 2312 in_f = item->ri_buf[0].i_addr;
2323 } else { 2313 } else {
2324 in_f = (xfs_inode_log_format_t *)kmem_alloc( 2314 in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
2325 sizeof(xfs_inode_log_format_t), KM_SLEEP);
2326 need_free = 1; 2315 need_free = 1;
2327 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 2316 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
2328 if (error) 2317 if (error)
@@ -2370,7 +2359,7 @@ xlog_recover_do_inode_trans(
2370 error = EFSCORRUPTED; 2359 error = EFSCORRUPTED;
2371 goto error; 2360 goto error;
2372 } 2361 }
2373 dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); 2362 dicp = item->ri_buf[1].i_addr;
2374 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2363 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2375 xfs_buf_relse(bp); 2364 xfs_buf_relse(bp);
2376 xfs_fs_cmn_err(CE_ALERT, mp, 2365 xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2461,7 +2450,7 @@ xlog_recover_do_inode_trans(
2461 } 2450 }
2462 2451
2463 /* The core is in in-core format */ 2452 /* The core is in in-core format */
2464 xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); 2453 xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
2465 2454
2466 /* the rest is in on-disk format */ 2455 /* the rest is in on-disk format */
2467 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { 2456 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
@@ -2578,7 +2567,7 @@ xlog_recover_do_quotaoff_trans(
2578 return (0); 2567 return (0);
2579 } 2568 }
2580 2569
2581 qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; 2570 qoff_f = item->ri_buf[0].i_addr;
2582 ASSERT(qoff_f); 2571 ASSERT(qoff_f);
2583 2572
2584 /* 2573 /*
@@ -2622,9 +2611,8 @@ xlog_recover_do_dquot_trans(
2622 if (mp->m_qflags == 0) 2611 if (mp->m_qflags == 0)
2623 return (0); 2612 return (0);
2624 2613
2625 recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; 2614 recddq = item->ri_buf[1].i_addr;
2626 2615 if (recddq == NULL) {
2627 if (item->ri_buf[1].i_addr == NULL) {
2628 cmn_err(CE_ALERT, 2616 cmn_err(CE_ALERT,
2629 "XFS: NULL dquot in %s.", __func__); 2617 "XFS: NULL dquot in %s.", __func__);
2630 return XFS_ERROR(EIO); 2618 return XFS_ERROR(EIO);
@@ -2654,7 +2642,7 @@ xlog_recover_do_dquot_trans(
2654 * The other possibility, of course, is that the quota subsystem was 2642 * The other possibility, of course, is that the quota subsystem was
2655 * removed since the last mount - ENOSYS. 2643 * removed since the last mount - ENOSYS.
2656 */ 2644 */
2657 dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; 2645 dq_f = item->ri_buf[0].i_addr;
2658 ASSERT(dq_f); 2646 ASSERT(dq_f);
2659 if ((error = xfs_qm_dqcheck(recddq, 2647 if ((error = xfs_qm_dqcheck(recddq,
2660 dq_f->qlf_id, 2648 dq_f->qlf_id,
@@ -2721,7 +2709,7 @@ xlog_recover_do_efi_trans(
2721 return 0; 2709 return 0;
2722 } 2710 }
2723 2711
2724 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; 2712 efi_formatp = item->ri_buf[0].i_addr;
2725 2713
2726 mp = log->l_mp; 2714 mp = log->l_mp;
2727 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2715 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
@@ -2767,7 +2755,7 @@ xlog_recover_do_efd_trans(
2767 return; 2755 return;
2768 } 2756 }
2769 2757
2770 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; 2758 efd_formatp = item->ri_buf[0].i_addr;
2771 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2759 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2772 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2760 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
2773 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 2761 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 69f62d8b2816..aeb9d72ebf6e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -25,13 +25,10 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 32#include "xfs_dinode.h"
36#include "xfs_inode.h" 33#include "xfs_inode.h"
37#include "xfs_btree.h" 34#include "xfs_btree.h"
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5761087ee8ea..622da2179a57 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,65 +66,6 @@ struct xfs_nameops;
66struct xfs_ail; 66struct xfs_ail;
67struct xfs_quotainfo; 67struct xfs_quotainfo;
68 68
69
70/*
71 * Prototypes and functions for the Data Migration subsystem.
72 */
73
74typedef int (*xfs_send_data_t)(int, struct xfs_inode *,
75 xfs_off_t, size_t, int, int *);
76typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint);
77typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
79 struct xfs_inode *, dm_right_t,
80 struct xfs_inode *, dm_right_t,
81 const unsigned char *, const unsigned char *,
82 mode_t, int, int);
83typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
84 char *, char *);
85typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
86 dm_right_t, mode_t, int, int);
87
88typedef struct xfs_dmops {
89 xfs_send_data_t xfs_send_data;
90 xfs_send_mmap_t xfs_send_mmap;
91 xfs_send_destroy_t xfs_send_destroy;
92 xfs_send_namesp_t xfs_send_namesp;
93 xfs_send_mount_t xfs_send_mount;
94 xfs_send_unmount_t xfs_send_unmount;
95} xfs_dmops_t;
96
97#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \
98 (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED)
99
100#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
101 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
102#define XFS_SEND_MMAP(mp, vma,fl) \
103 (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl)
104#define XFS_SEND_DESTROY(mp, ip,right) \
105 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
106#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
107 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
108#define XFS_SEND_MOUNT(mp,right,path,name) \
109 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
110#define XFS_SEND_PREUNMOUNT(mp) \
111do { \
112 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
113 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \
114 (mp)->m_rootip, DM_RIGHT_NULL, \
115 (mp)->m_rootip, DM_RIGHT_NULL, \
116 NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
117 } \
118} while (0)
119#define XFS_SEND_UNMOUNT(mp) \
120do { \
121 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
122 (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \
123 DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
124 } \
125} while (0)
126
127
128#ifdef HAVE_PERCPU_SB 69#ifdef HAVE_PERCPU_SB
129 70
130/* 71/*
@@ -241,8 +182,6 @@ typedef struct xfs_mount {
241 uint m_chsize; /* size of next field */ 182 uint m_chsize; /* size of next field */
242 struct xfs_chash *m_chash; /* fs private inode per-cluster 183 struct xfs_chash *m_chash; /* fs private inode per-cluster
243 * hash table */ 184 * hash table */
244 struct xfs_dmops *m_dm_ops; /* vector of DMI ops */
245 struct xfs_qmops *m_qm_ops; /* vector of XQM ops */
246 atomic_t m_active_trans; /* number trans frozen */ 185 atomic_t m_active_trans; /* number trans frozen */
247#ifdef HAVE_PERCPU_SB 186#ifdef HAVE_PERCPU_SB
248 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ 187 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
@@ -269,7 +208,6 @@ typedef struct xfs_mount {
269 must be synchronous except 208 must be synchronous except
270 for space allocations */ 209 for space allocations */
271#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */ 210#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
272#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */
273#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 211#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
274#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 212#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
275 operations, typically for 213 operations, typically for
@@ -282,8 +220,6 @@ typedef struct xfs_mount {
282#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ 220#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */
283#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 221#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
284#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ 222#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */
285#define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */
286 /* osyncisdsync is now default*/
287#define XFS_MOUNT_32BITINODES (1ULL << 14) /* do not create inodes above 223#define XFS_MOUNT_32BITINODES (1ULL << 14) /* do not create inodes above
288 * 32 bits in size */ 224 * 32 bits in size */
289#define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */ 225#define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */
@@ -440,11 +376,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
440 376
441extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 377extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
442 378
443extern int xfs_dmops_get(struct xfs_mount *);
444extern void xfs_dmops_put(struct xfs_mount *);
445
446extern struct xfs_dmops xfs_dmcore_xfs;
447
448#endif /* __KERNEL__ */ 379#endif /* __KERNEL__ */
449 380
450extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 381extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index fc1cda23b817..8fca957200df 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -24,12 +24,9 @@
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h" 26#include "xfs_dir2.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 30#include "xfs_dinode.h"
34#include "xfs_inode.h" 31#include "xfs_inode.h"
35#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
@@ -116,20 +113,7 @@ xfs_rename(
116 int spaceres; 113 int spaceres;
117 int num_inodes; 114 int num_inodes;
118 115
119 xfs_itrace_entry(src_dp); 116 trace_xfs_rename(src_dp, target_dp, src_name, target_name);
120 xfs_itrace_entry(target_dp);
121
122 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) ||
123 DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) {
124 error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
125 src_dp, DM_RIGHT_NULL,
126 target_dp, DM_RIGHT_NULL,
127 src_name->name, target_name->name,
128 0, 0, 0);
129 if (error)
130 return error;
131 }
132 /* Return through std_return after this point. */
133 117
134 new_parent = (src_dp != target_dp); 118 new_parent = (src_dp != target_dp);
135 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); 119 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
@@ -184,26 +168,14 @@ xfs_rename(
184 /* 168 /*
185 * Join all the inodes to the transaction. From this point on, 169 * Join all the inodes to the transaction. From this point on,
186 * we can rely on either trans_commit or trans_cancel to unlock 170 * we can rely on either trans_commit or trans_cancel to unlock
187 * them. Note that we need to add a vnode reference to the 171 * them.
188 * directories since trans_commit & trans_cancel will decrement
189 * them when they unlock the inodes. Also, we need to be careful
190 * not to add an inode to the transaction more than once.
191 */ 172 */
192 IHOLD(src_dp); 173 xfs_trans_ijoin_ref(tp, src_dp, XFS_ILOCK_EXCL);
193 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 174 if (new_parent)
194 175 xfs_trans_ijoin_ref(tp, target_dp, XFS_ILOCK_EXCL);
195 if (new_parent) { 176 xfs_trans_ijoin_ref(tp, src_ip, XFS_ILOCK_EXCL);
196 IHOLD(target_dp); 177 if (target_ip)
197 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 178 xfs_trans_ijoin_ref(tp, target_ip, XFS_ILOCK_EXCL);
198 }
199
200 IHOLD(src_ip);
201 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
202
203 if (target_ip) {
204 IHOLD(target_ip);
205 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
206 }
207 179
208 /* 180 /*
209 * If we are using project inheritance, we only allow renames 181 * If we are using project inheritance, we only allow renames
@@ -369,26 +341,13 @@ xfs_rename(
369 * trans_commit will unlock src_ip, target_ip & decrement 341 * trans_commit will unlock src_ip, target_ip & decrement
370 * the vnode references. 342 * the vnode references.
371 */ 343 */
372 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 344 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
373
374 /* Fall through to std_return with error = 0 or errno from
375 * xfs_trans_commit */
376std_return:
377 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) ||
378 DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) {
379 (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
380 src_dp, DM_RIGHT_NULL,
381 target_dp, DM_RIGHT_NULL,
382 src_name->name, target_name->name,
383 0, error, 0);
384 }
385 return error;
386 345
387 abort_return: 346 abort_return:
388 cancel_flags |= XFS_TRANS_ABORT; 347 cancel_flags |= XFS_TRANS_ABORT;
389 /* FALLTHROUGH */
390 error_return: 348 error_return:
391 xfs_bmap_cancel(&free_list); 349 xfs_bmap_cancel(&free_list);
392 xfs_trans_cancel(tp, cancel_flags); 350 xfs_trans_cancel(tp, cancel_flags);
393 goto std_return; 351 std_return:
352 return error;
394} 353}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a2d32ce335aa..891260fea11e 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -25,17 +25,10 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 30#include "xfs_dinode.h"
36#include "xfs_inode.h" 31#include "xfs_inode.h"
37#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 32#include "xfs_alloc.h"
40#include "xfs_bmap.h" 33#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 34#include "xfs_rtalloc.h"
@@ -129,7 +122,7 @@ xfs_growfs_rt_alloc(
129 cancelflags |= XFS_TRANS_ABORT; 122 cancelflags |= XFS_TRANS_ABORT;
130 error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks, 123 error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks,
131 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock, 124 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock,
132 resblks, &map, &nmap, &flist, NULL); 125 resblks, &map, &nmap, &flist);
133 if (!error && nmap < 1) 126 if (!error && nmap < 1)
134 error = XFS_ERROR(ENOSPC); 127 error = XFS_ERROR(ENOSPC);
135 if (error) 128 if (error)
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index e336742a58a4..56861d5daaef 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -24,27 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 29#include "xfs_dinode.h"
36#include "xfs_inode.h" 30#include "xfs_inode.h"
37#include "xfs_inode_item.h"
38#include "xfs_itable.h"
39#include "xfs_btree.h"
40#include "xfs_alloc.h"
41#include "xfs_ialloc.h"
42#include "xfs_attr.h"
43#include "xfs_bmap.h"
44#include "xfs_error.h" 31#include "xfs_error.h"
45#include "xfs_buf_item.h"
46#include "xfs_rw.h" 32#include "xfs_rw.h"
47#include "xfs_trace.h"
48 33
49/* 34/*
50 * Force a shutdown of the filesystem instantly while keeping 35 * Force a shutdown of the filesystem instantly while keeping
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 28547dfce037..fdca7416c754 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * Copyright (C) 2010 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -24,16 +25,12 @@
24#include "xfs_trans.h" 25#include "xfs_trans.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_error.h" 29#include "xfs_error.h"
31#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 34#include "xfs_dinode.h"
38#include "xfs_inode.h" 35#include "xfs_inode.h"
39#include "xfs_btree.h" 36#include "xfs_btree.h"
@@ -47,6 +44,7 @@
47#include "xfs_trace.h" 44#include "xfs_trace.h"
48 45
49kmem_zone_t *xfs_trans_zone; 46kmem_zone_t *xfs_trans_zone;
47kmem_zone_t *xfs_log_item_desc_zone;
50 48
51 49
52/* 50/*
@@ -597,8 +595,7 @@ _xfs_trans_alloc(
597 tp->t_magic = XFS_TRANS_MAGIC; 595 tp->t_magic = XFS_TRANS_MAGIC;
598 tp->t_type = type; 596 tp->t_type = type;
599 tp->t_mountp = mp; 597 tp->t_mountp = mp;
600 tp->t_items_free = XFS_LIC_NUM_SLOTS; 598 INIT_LIST_HEAD(&tp->t_items);
601 xfs_lic_init(&(tp->t_items));
602 INIT_LIST_HEAD(&tp->t_busy); 599 INIT_LIST_HEAD(&tp->t_busy);
603 return tp; 600 return tp;
604} 601}
@@ -643,8 +640,7 @@ xfs_trans_dup(
643 ntp->t_magic = XFS_TRANS_MAGIC; 640 ntp->t_magic = XFS_TRANS_MAGIC;
644 ntp->t_type = tp->t_type; 641 ntp->t_type = tp->t_type;
645 ntp->t_mountp = tp->t_mountp; 642 ntp->t_mountp = tp->t_mountp;
646 ntp->t_items_free = XFS_LIC_NUM_SLOTS; 643 INIT_LIST_HEAD(&ntp->t_items);
647 xfs_lic_init(&(ntp->t_items));
648 INIT_LIST_HEAD(&ntp->t_busy); 644 INIT_LIST_HEAD(&ntp->t_busy);
649 645
650 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 646 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1124,6 +1120,108 @@ xfs_trans_unreserve_and_mod_sb(
1124} 1120}
1125 1121
1126/* 1122/*
1123 * Add the given log item to the transaction's list of log items.
1124 *
1125 * The log item will now point to its new descriptor with its li_desc field.
1126 */
1127void
1128xfs_trans_add_item(
1129 struct xfs_trans *tp,
1130 struct xfs_log_item *lip)
1131{
1132 struct xfs_log_item_desc *lidp;
1133
1134 ASSERT(lip->li_mountp = tp->t_mountp);
1135 ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
1136
1137 lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
1138
1139 lidp->lid_item = lip;
1140 lidp->lid_flags = 0;
1141 lidp->lid_size = 0;
1142 list_add_tail(&lidp->lid_trans, &tp->t_items);
1143
1144 lip->li_desc = lidp;
1145}
1146
1147STATIC void
1148xfs_trans_free_item_desc(
1149 struct xfs_log_item_desc *lidp)
1150{
1151 list_del_init(&lidp->lid_trans);
1152 kmem_zone_free(xfs_log_item_desc_zone, lidp);
1153}
1154
1155/*
1156 * Unlink and free the given descriptor.
1157 */
1158void
1159xfs_trans_del_item(
1160 struct xfs_log_item *lip)
1161{
1162 xfs_trans_free_item_desc(lip->li_desc);
1163 lip->li_desc = NULL;
1164}
1165
1166/*
1167 * Unlock all of the items of a transaction and free all the descriptors
1168 * of that transaction.
1169 */
1170STATIC void
1171xfs_trans_free_items(
1172 struct xfs_trans *tp,
1173 xfs_lsn_t commit_lsn,
1174 int flags)
1175{
1176 struct xfs_log_item_desc *lidp, *next;
1177
1178 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1179 struct xfs_log_item *lip = lidp->lid_item;
1180
1181 lip->li_desc = NULL;
1182
1183 if (commit_lsn != NULLCOMMITLSN)
1184 IOP_COMMITTING(lip, commit_lsn);
1185 if (flags & XFS_TRANS_ABORT)
1186 lip->li_flags |= XFS_LI_ABORTED;
1187 IOP_UNLOCK(lip);
1188
1189 xfs_trans_free_item_desc(lidp);
1190 }
1191}
1192
1193/*
1194 * Unlock the items associated with a transaction.
1195 *
1196 * Items which were not logged should be freed. Those which were logged must
1197 * still be tracked so they can be unpinned when the transaction commits.
1198 */
1199STATIC void
1200xfs_trans_unlock_items(
1201 struct xfs_trans *tp,
1202 xfs_lsn_t commit_lsn)
1203{
1204 struct xfs_log_item_desc *lidp, *next;
1205
1206 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1207 struct xfs_log_item *lip = lidp->lid_item;
1208
1209 lip->li_desc = NULL;
1210
1211 if (commit_lsn != NULLCOMMITLSN)
1212 IOP_COMMITTING(lip, commit_lsn);
1213 IOP_UNLOCK(lip);
1214
1215 /*
1216 * Free the descriptor if the item is not dirty
1217 * within this transaction.
1218 */
1219 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1220 xfs_trans_free_item_desc(lidp);
1221 }
1222}
1223
1224/*
1127 * Total up the number of log iovecs needed to commit this 1225 * Total up the number of log iovecs needed to commit this
1128 * transaction. The transaction itself needs one for the 1226 * transaction. The transaction itself needs one for the
1129 * transaction header. Ask each dirty item in turn how many 1227 * transaction header. Ask each dirty item in turn how many
@@ -1134,30 +1232,27 @@ xfs_trans_count_vecs(
1134 struct xfs_trans *tp) 1232 struct xfs_trans *tp)
1135{ 1233{
1136 int nvecs; 1234 int nvecs;
1137 xfs_log_item_desc_t *lidp; 1235 struct xfs_log_item_desc *lidp;
1138 1236
1139 nvecs = 1; 1237 nvecs = 1;
1140 lidp = xfs_trans_first_item(tp);
1141 ASSERT(lidp != NULL);
1142 1238
1143 /* In the non-debug case we need to start bailing out if we 1239 /* In the non-debug case we need to start bailing out if we
1144 * didn't find a log_item here, return zero and let trans_commit 1240 * didn't find a log_item here, return zero and let trans_commit
1145 * deal with it. 1241 * deal with it.
1146 */ 1242 */
1147 if (lidp == NULL) 1243 if (list_empty(&tp->t_items)) {
1244 ASSERT(0);
1148 return 0; 1245 return 0;
1246 }
1149 1247
1150 while (lidp != NULL) { 1248 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1151 /* 1249 /*
1152 * Skip items which aren't dirty in this transaction. 1250 * Skip items which aren't dirty in this transaction.
1153 */ 1251 */
1154 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1252 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1155 lidp = xfs_trans_next_item(tp, lidp);
1156 continue; 1253 continue;
1157 }
1158 lidp->lid_size = IOP_SIZE(lidp->lid_item); 1254 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1159 nvecs += lidp->lid_size; 1255 nvecs += lidp->lid_size;
1160 lidp = xfs_trans_next_item(tp, lidp);
1161 } 1256 }
1162 1257
1163 return nvecs; 1258 return nvecs;
@@ -1177,7 +1272,7 @@ xfs_trans_fill_vecs(
1177 struct xfs_trans *tp, 1272 struct xfs_trans *tp,
1178 struct xfs_log_iovec *log_vector) 1273 struct xfs_log_iovec *log_vector)
1179{ 1274{
1180 xfs_log_item_desc_t *lidp; 1275 struct xfs_log_item_desc *lidp;
1181 struct xfs_log_iovec *vecp; 1276 struct xfs_log_iovec *vecp;
1182 uint nitems; 1277 uint nitems;
1183 1278
@@ -1188,14 +1283,11 @@ xfs_trans_fill_vecs(
1188 vecp = log_vector + 1; 1283 vecp = log_vector + 1;
1189 1284
1190 nitems = 0; 1285 nitems = 0;
1191 lidp = xfs_trans_first_item(tp); 1286 ASSERT(!list_empty(&tp->t_items));
1192 ASSERT(lidp); 1287 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1193 while (lidp) {
1194 /* Skip items which aren't dirty in this transaction. */ 1288 /* Skip items which aren't dirty in this transaction. */
1195 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1289 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1196 lidp = xfs_trans_next_item(tp, lidp);
1197 continue; 1290 continue;
1198 }
1199 1291
1200 /* 1292 /*
1201 * The item may be marked dirty but not log anything. This can 1293 * The item may be marked dirty but not log anything. This can
@@ -1206,7 +1298,6 @@ xfs_trans_fill_vecs(
1206 IOP_FORMAT(lidp->lid_item, vecp); 1298 IOP_FORMAT(lidp->lid_item, vecp);
1207 vecp += lidp->lid_size; 1299 vecp += lidp->lid_size;
1208 IOP_PIN(lidp->lid_item); 1300 IOP_PIN(lidp->lid_item);
1209 lidp = xfs_trans_next_item(tp, lidp);
1210 } 1301 }
1211 1302
1212 /* 1303 /*
@@ -1284,7 +1375,7 @@ xfs_trans_item_committed(
1284 * log item flags, if anyone else stales the buffer we do not want to 1375 * log item flags, if anyone else stales the buffer we do not want to
1285 * pay any attention to it. 1376 * pay any attention to it.
1286 */ 1377 */
1287 IOP_UNPIN(lip); 1378 IOP_UNPIN(lip, 0);
1288} 1379}
1289 1380
1290/* 1381/*
@@ -1301,24 +1392,15 @@ xfs_trans_committed(
1301 struct xfs_trans *tp, 1392 struct xfs_trans *tp,
1302 int abortflag) 1393 int abortflag)
1303{ 1394{
1304 xfs_log_item_desc_t *lidp; 1395 struct xfs_log_item_desc *lidp, *next;
1305 xfs_log_item_chunk_t *licp;
1306 xfs_log_item_chunk_t *next_licp;
1307 1396
1308 /* Call the transaction's completion callback if there is one. */ 1397 /* Call the transaction's completion callback if there is one. */
1309 if (tp->t_callback != NULL) 1398 if (tp->t_callback != NULL)
1310 tp->t_callback(tp, tp->t_callarg); 1399 tp->t_callback(tp, tp->t_callarg);
1311 1400
1312 for (lidp = xfs_trans_first_item(tp); 1401 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1313 lidp != NULL;
1314 lidp = xfs_trans_next_item(tp, lidp)) {
1315 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); 1402 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
1316 } 1403 xfs_trans_free_item_desc(lidp);
1317
1318 /* free the item chunks, ignoring the embedded chunk */
1319 for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) {
1320 next_licp = licp->lic_next;
1321 kmem_free(licp);
1322 } 1404 }
1323 1405
1324 xfs_trans_free(tp); 1406 xfs_trans_free(tp);
@@ -1333,16 +1415,14 @@ xfs_trans_uncommit(
1333 struct xfs_trans *tp, 1415 struct xfs_trans *tp,
1334 uint flags) 1416 uint flags)
1335{ 1417{
1336 xfs_log_item_desc_t *lidp; 1418 struct xfs_log_item_desc *lidp;
1337 1419
1338 for (lidp = xfs_trans_first_item(tp); 1420 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1339 lidp != NULL;
1340 lidp = xfs_trans_next_item(tp, lidp)) {
1341 /* 1421 /*
1342 * Unpin all but those that aren't dirty. 1422 * Unpin all but those that aren't dirty.
1343 */ 1423 */
1344 if (lidp->lid_flags & XFS_LID_DIRTY) 1424 if (lidp->lid_flags & XFS_LID_DIRTY)
1345 IOP_UNPIN_REMOVE(lidp->lid_item, tp); 1425 IOP_UNPIN(lidp->lid_item, 1);
1346 } 1426 }
1347 1427
1348 xfs_trans_unreserve_and_mod_sb(tp); 1428 xfs_trans_unreserve_and_mod_sb(tp);
@@ -1508,33 +1588,28 @@ STATIC struct xfs_log_vec *
1508xfs_trans_alloc_log_vecs( 1588xfs_trans_alloc_log_vecs(
1509 xfs_trans_t *tp) 1589 xfs_trans_t *tp)
1510{ 1590{
1511 xfs_log_item_desc_t *lidp; 1591 struct xfs_log_item_desc *lidp;
1512 struct xfs_log_vec *lv = NULL; 1592 struct xfs_log_vec *lv = NULL;
1513 struct xfs_log_vec *ret_lv = NULL; 1593 struct xfs_log_vec *ret_lv = NULL;
1514 1594
1515 lidp = xfs_trans_first_item(tp);
1516 1595
1517 /* Bail out if we didn't find a log item. */ 1596 /* Bail out if we didn't find a log item. */
1518 if (!lidp) { 1597 if (list_empty(&tp->t_items)) {
1519 ASSERT(0); 1598 ASSERT(0);
1520 return NULL; 1599 return NULL;
1521 } 1600 }
1522 1601
1523 while (lidp != NULL) { 1602 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1524 struct xfs_log_vec *new_lv; 1603 struct xfs_log_vec *new_lv;
1525 1604
1526 /* Skip items which aren't dirty in this transaction. */ 1605 /* Skip items which aren't dirty in this transaction. */
1527 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1606 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1528 lidp = xfs_trans_next_item(tp, lidp);
1529 continue; 1607 continue;
1530 }
1531 1608
1532 /* Skip items that do not have any vectors for writing */ 1609 /* Skip items that do not have any vectors for writing */
1533 lidp->lid_size = IOP_SIZE(lidp->lid_item); 1610 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1534 if (!lidp->lid_size) { 1611 if (!lidp->lid_size)
1535 lidp = xfs_trans_next_item(tp, lidp);
1536 continue; 1612 continue;
1537 }
1538 1613
1539 new_lv = kmem_zalloc(sizeof(*new_lv) + 1614 new_lv = kmem_zalloc(sizeof(*new_lv) +
1540 lidp->lid_size * sizeof(struct xfs_log_iovec), 1615 lidp->lid_size * sizeof(struct xfs_log_iovec),
@@ -1549,7 +1624,6 @@ xfs_trans_alloc_log_vecs(
1549 else 1624 else
1550 lv->lv_next = new_lv; 1625 lv->lv_next = new_lv;
1551 lv = new_lv; 1626 lv = new_lv;
1552 lidp = xfs_trans_next_item(tp, lidp);
1553 } 1627 }
1554 1628
1555 return ret_lv; 1629 return ret_lv;
@@ -1708,12 +1782,6 @@ xfs_trans_cancel(
1708 int flags) 1782 int flags)
1709{ 1783{
1710 int log_flags; 1784 int log_flags;
1711#ifdef DEBUG
1712 xfs_log_item_chunk_t *licp;
1713 xfs_log_item_desc_t *lidp;
1714 xfs_log_item_t *lip;
1715 int i;
1716#endif
1717 xfs_mount_t *mp = tp->t_mountp; 1785 xfs_mount_t *mp = tp->t_mountp;
1718 1786
1719 /* 1787 /*
@@ -1732,21 +1800,11 @@ xfs_trans_cancel(
1732 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1800 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1733 } 1801 }
1734#ifdef DEBUG 1802#ifdef DEBUG
1735 if (!(flags & XFS_TRANS_ABORT)) { 1803 if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) {
1736 licp = &(tp->t_items); 1804 struct xfs_log_item_desc *lidp;
1737 while (licp != NULL) { 1805
1738 lidp = licp->lic_descs; 1806 list_for_each_entry(lidp, &tp->t_items, lid_trans)
1739 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1807 ASSERT(!(lidp->lid_item->li_type == XFS_LI_EFD));
1740 if (xfs_lic_isfree(licp, i)) {
1741 continue;
1742 }
1743
1744 lip = lidp->lid_item;
1745 if (!XFS_FORCED_SHUTDOWN(mp))
1746 ASSERT(!(lip->li_type == XFS_LI_EFD));
1747 }
1748 licp = licp->lic_next;
1749 }
1750 } 1808 }
1751#endif 1809#endif
1752 xfs_trans_unreserve_and_mod_sb(tp); 1810 xfs_trans_unreserve_and_mod_sb(tp);
@@ -1834,7 +1892,6 @@ xfs_trans_roll(
1834 if (error) 1892 if (error)
1835 return error; 1893 return error;
1836 1894
1837 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); 1895 xfs_trans_ijoin(trans, dp);
1838 xfs_trans_ihold(trans, dp);
1839 return 0; 1896 return 0;
1840} 1897}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e639e8e9a2a9..c13c0f97b494 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -161,105 +161,14 @@ typedef struct xfs_trans_header {
161 * the amount of space needed to log the item it describes 161 * the amount of space needed to log the item it describes
162 * once we get to commit processing (see xfs_trans_commit()). 162 * once we get to commit processing (see xfs_trans_commit()).
163 */ 163 */
164typedef struct xfs_log_item_desc { 164struct xfs_log_item_desc {
165 struct xfs_log_item *lid_item; 165 struct xfs_log_item *lid_item;
166 ushort lid_size; 166 ushort lid_size;
167 unsigned char lid_flags; 167 unsigned char lid_flags;
168 unsigned char lid_index; 168 struct list_head lid_trans;
169} xfs_log_item_desc_t; 169};
170 170
171#define XFS_LID_DIRTY 0x1 171#define XFS_LID_DIRTY 0x1
172#define XFS_LID_PINNED 0x2
173
174/*
175 * This structure is used to maintain a chunk list of log_item_desc
176 * structures. The free field is a bitmask indicating which descriptors
177 * in this chunk's array are free. The unused field is the first value
178 * not used since this chunk was allocated.
179 */
180#define XFS_LIC_NUM_SLOTS 15
181typedef struct xfs_log_item_chunk {
182 struct xfs_log_item_chunk *lic_next;
183 ushort lic_free;
184 ushort lic_unused;
185 xfs_log_item_desc_t lic_descs[XFS_LIC_NUM_SLOTS];
186} xfs_log_item_chunk_t;
187
188#define XFS_LIC_MAX_SLOT (XFS_LIC_NUM_SLOTS - 1)
189#define XFS_LIC_FREEMASK ((1 << XFS_LIC_NUM_SLOTS) - 1)
190
191
192/*
193 * Initialize the given chunk. Set the chunk's free descriptor mask
194 * to indicate that all descriptors are free. The caller gets to set
195 * lic_unused to the right value (0 matches all free). The
196 * lic_descs.lid_index values are set up as each desc is allocated.
197 */
198static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
199{
200 cp->lic_free = XFS_LIC_FREEMASK;
201}
202
203static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
204{
205 cp->lic_descs[slot].lid_index = (unsigned char)(slot);
206}
207
208static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
209{
210 return cp->lic_free & XFS_LIC_FREEMASK;
211}
212
213static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
214{
215 cp->lic_free = XFS_LIC_FREEMASK;
216}
217
218static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
219{
220 return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
221}
222
223static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
224{
225 return (cp->lic_free & (1 << slot));
226}
227
228static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
229{
230 cp->lic_free &= ~(1 << slot);
231}
232
233static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
234{
235 cp->lic_free |= 1 << slot;
236}
237
238static inline xfs_log_item_desc_t *
239xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
240{
241 return &(cp->lic_descs[slot]);
242}
243
244static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
245{
246 return (uint)dp->lid_index;
247}
248
249/*
250 * Calculate the address of a chunk given a descriptor pointer:
251 * dp - dp->lid_index give the address of the start of the lic_descs array.
252 * From this we subtract the offset of the lic_descs field in a chunk.
253 * All of this yields the address of the chunk, which is
254 * cast to a chunk pointer.
255 */
256static inline xfs_log_item_chunk_t *
257xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
258{
259 return (xfs_log_item_chunk_t*) \
260 (((xfs_caddr_t)((dp) - (dp)->lid_index)) - \
261 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
262}
263 172
264#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ 173#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */
265/* 174/*
@@ -275,8 +184,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
275/* 184/*
276 * Values for call flags parameter. 185 * Values for call flags parameter.
277 */ 186 */
278#define XFS_TRANS_NOSLEEP 0x1
279#define XFS_TRANS_WAIT 0x2
280#define XFS_TRANS_RELEASE_LOG_RES 0x4 187#define XFS_TRANS_RELEASE_LOG_RES 0x4
281#define XFS_TRANS_ABORT 0x8 188#define XFS_TRANS_ABORT 0x8
282 189
@@ -438,8 +345,7 @@ typedef struct xfs_item_ops {
438 uint (*iop_size)(xfs_log_item_t *); 345 uint (*iop_size)(xfs_log_item_t *);
439 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 346 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
440 void (*iop_pin)(xfs_log_item_t *); 347 void (*iop_pin)(xfs_log_item_t *);
441 void (*iop_unpin)(xfs_log_item_t *); 348 void (*iop_unpin)(xfs_log_item_t *, int remove);
442 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
443 uint (*iop_trylock)(xfs_log_item_t *); 349 uint (*iop_trylock)(xfs_log_item_t *);
444 void (*iop_unlock)(xfs_log_item_t *); 350 void (*iop_unlock)(xfs_log_item_t *);
445 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 351 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
@@ -451,8 +357,7 @@ typedef struct xfs_item_ops {
451#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) 357#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
452#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) 358#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
453#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) 359#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
454#define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip) 360#define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove)
455#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
456#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) 361#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
457#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) 362#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
458#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) 363#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
@@ -516,8 +421,7 @@ typedef struct xfs_trans {
516 int64_t t_rblocks_delta;/* superblock rblocks change */ 421 int64_t t_rblocks_delta;/* superblock rblocks change */
517 int64_t t_rextents_delta;/* superblocks rextents chg */ 422 int64_t t_rextents_delta;/* superblocks rextents chg */
518 int64_t t_rextslog_delta;/* superblocks rextslog chg */ 423 int64_t t_rextslog_delta;/* superblocks rextslog chg */
519 unsigned int t_items_free; /* log item descs free */ 424 struct list_head t_items; /* log item descriptors */
520 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
521 xfs_trans_header_t t_header; /* header for in-log trans */ 425 xfs_trans_header_t t_header; /* header for in-log trans */
522 struct list_head t_busy; /* list of busy extents */ 426 struct list_head t_busy; /* list of busy extents */
523 unsigned long t_pflags; /* saved process flags state */ 427 unsigned long t_pflags; /* saved process flags state */
@@ -569,8 +473,8 @@ void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
569void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 473void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
570int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, 474int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
571 xfs_ino_t , uint, uint, struct xfs_inode **); 475 xfs_ino_t , uint, uint, struct xfs_inode **);
572void xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint); 476void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
573void xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *); 477void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
574void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); 478void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
575void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 479void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
576struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); 480struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint);
@@ -595,6 +499,7 @@ int xfs_trans_ail_init(struct xfs_mount *);
595void xfs_trans_ail_destroy(struct xfs_mount *); 499void xfs_trans_ail_destroy(struct xfs_mount *);
596 500
597extern kmem_zone_t *xfs_trans_zone; 501extern kmem_zone_t *xfs_trans_zone;
502extern kmem_zone_t *xfs_log_item_desc_zone;
598 503
599#endif /* __KERNEL__ */ 504#endif /* __KERNEL__ */
600 505
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index e799824f7245..dc9069568ff7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -24,7 +24,6 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
30#include "xfs_error.h" 29#include "xfs_error.h"
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 63d81a22f4fd..90af025e6839 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -24,14 +24,10 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_buf_item.h" 33#include "xfs_buf_item.h"
@@ -51,36 +47,17 @@ xfs_trans_buf_item_match(
51 xfs_daddr_t blkno, 47 xfs_daddr_t blkno,
52 int len) 48 int len)
53{ 49{
54 xfs_log_item_chunk_t *licp; 50 struct xfs_log_item_desc *lidp;
55 xfs_log_item_desc_t *lidp; 51 struct xfs_buf_log_item *blip;
56 xfs_buf_log_item_t *blip;
57 int i;
58 52
59 len = BBTOB(len); 53 len = BBTOB(len);
60 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { 54 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
61 if (xfs_lic_are_all_free(licp)) { 55 blip = (struct xfs_buf_log_item *)lidp->lid_item;
62 ASSERT(licp == &tp->t_items); 56 if (blip->bli_item.li_type == XFS_LI_BUF &&
63 ASSERT(licp->lic_next == NULL); 57 XFS_BUF_TARGET(blip->bli_buf) == target &&
64 return NULL; 58 XFS_BUF_ADDR(blip->bli_buf) == blkno &&
65 } 59 XFS_BUF_COUNT(blip->bli_buf) == len)
66 60 return blip->bli_buf;
67 for (i = 0; i < licp->lic_unused; i++) {
68 /*
69 * Skip unoccupied slots.
70 */
71 if (xfs_lic_isfree(licp, i))
72 continue;
73
74 lidp = xfs_lic_slot(licp, i);
75 blip = (xfs_buf_log_item_t *)lidp->lid_item;
76 if (blip->bli_item.li_type != XFS_LI_BUF)
77 continue;
78
79 if (XFS_BUF_TARGET(blip->bli_buf) == target &&
80 XFS_BUF_ADDR(blip->bli_buf) == blkno &&
81 XFS_BUF_COUNT(blip->bli_buf) == len)
82 return blip->bli_buf;
83 }
84 } 61 }
85 62
86 return NULL; 63 return NULL;
@@ -127,7 +104,7 @@ _xfs_trans_bjoin(
127 /* 104 /*
128 * Get a log_item_desc to point at the new item. 105 * Get a log_item_desc to point at the new item.
129 */ 106 */
130 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); 107 xfs_trans_add_item(tp, &bip->bli_item);
131 108
132 /* 109 /*
133 * Initialize b_fsprivate2 so we can find it with incore_match() 110 * Initialize b_fsprivate2 so we can find it with incore_match()
@@ -483,7 +460,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
483{ 460{
484 xfs_buf_log_item_t *bip; 461 xfs_buf_log_item_t *bip;
485 xfs_log_item_t *lip; 462 xfs_log_item_t *lip;
486 xfs_log_item_desc_t *lidp;
487 463
488 /* 464 /*
489 * Default to a normal brelse() call if the tp is NULL. 465 * Default to a normal brelse() call if the tp is NULL.
@@ -514,13 +490,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
514 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 490 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
515 ASSERT(atomic_read(&bip->bli_refcount) > 0); 491 ASSERT(atomic_read(&bip->bli_refcount) > 0);
516 492
517 /*
518 * Find the item descriptor pointing to this buffer's
519 * log item. It must be there.
520 */
521 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
522 ASSERT(lidp != NULL);
523
524 trace_xfs_trans_brelse(bip); 493 trace_xfs_trans_brelse(bip);
525 494
526 /* 495 /*
@@ -536,7 +505,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
536 * If the buffer is dirty within this transaction, we can't 505 * If the buffer is dirty within this transaction, we can't
537 * release it until we commit. 506 * release it until we commit.
538 */ 507 */
539 if (lidp->lid_flags & XFS_LID_DIRTY) 508 if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY)
540 return; 509 return;
541 510
542 /* 511 /*
@@ -553,7 +522,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
553 /* 522 /*
554 * Free up the log item descriptor tracking the released item. 523 * Free up the log item descriptor tracking the released item.
555 */ 524 */
556 xfs_trans_free_item(tp, lidp); 525 xfs_trans_del_item(&bip->bli_item);
557 526
558 /* 527 /*
559 * Clear the hold flag in the buf log item if it is set. 528 * Clear the hold flag in the buf log item if it is set.
@@ -665,7 +634,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
665 uint last) 634 uint last)
666{ 635{
667 xfs_buf_log_item_t *bip; 636 xfs_buf_log_item_t *bip;
668 xfs_log_item_desc_t *lidp;
669 637
670 ASSERT(XFS_BUF_ISBUSY(bp)); 638 ASSERT(XFS_BUF_ISBUSY(bp));
671 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 639 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
@@ -690,7 +658,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
690 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 658 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
691 ASSERT(atomic_read(&bip->bli_refcount) > 0); 659 ASSERT(atomic_read(&bip->bli_refcount) > 0);
692 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 660 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
693 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 661 bip->bli_item.li_cb = xfs_buf_iodone;
694 662
695 trace_xfs_trans_log_buf(bip); 663 trace_xfs_trans_log_buf(bip);
696 664
@@ -707,11 +675,8 @@ xfs_trans_log_buf(xfs_trans_t *tp,
707 bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; 675 bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL;
708 } 676 }
709 677
710 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
711 ASSERT(lidp != NULL);
712
713 tp->t_flags |= XFS_TRANS_DIRTY; 678 tp->t_flags |= XFS_TRANS_DIRTY;
714 lidp->lid_flags |= XFS_LID_DIRTY; 679 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
715 bip->bli_flags |= XFS_BLI_LOGGED; 680 bip->bli_flags |= XFS_BLI_LOGGED;
716 xfs_buf_item_log(bip, first, last); 681 xfs_buf_item_log(bip, first, last);
717} 682}
@@ -740,7 +705,6 @@ xfs_trans_binval(
740 xfs_trans_t *tp, 705 xfs_trans_t *tp,
741 xfs_buf_t *bp) 706 xfs_buf_t *bp)
742{ 707{
743 xfs_log_item_desc_t *lidp;
744 xfs_buf_log_item_t *bip; 708 xfs_buf_log_item_t *bip;
745 709
746 ASSERT(XFS_BUF_ISBUSY(bp)); 710 ASSERT(XFS_BUF_ISBUSY(bp));
@@ -748,8 +712,6 @@ xfs_trans_binval(
748 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 712 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
749 713
750 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 714 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
751 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
752 ASSERT(lidp != NULL);
753 ASSERT(atomic_read(&bip->bli_refcount) > 0); 715 ASSERT(atomic_read(&bip->bli_refcount) > 0);
754 716
755 trace_xfs_trans_binval(bip); 717 trace_xfs_trans_binval(bip);
@@ -764,7 +726,7 @@ xfs_trans_binval(
764 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 726 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
765 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); 727 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
766 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 728 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
767 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 729 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
768 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 730 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
769 return; 731 return;
770 } 732 }
@@ -797,7 +759,7 @@ xfs_trans_binval(
797 bip->bli_format.blf_flags |= XFS_BLF_CANCEL; 759 bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
798 memset((char *)(bip->bli_format.blf_data_map), 0, 760 memset((char *)(bip->bli_format.blf_data_map), 0,
799 (bip->bli_format.blf_map_size * sizeof(uint))); 761 (bip->bli_format.blf_map_size * sizeof(uint)));
800 lidp->lid_flags |= XFS_LID_DIRTY; 762 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
801 tp->t_flags |= XFS_TRANS_DIRTY; 763 tp->t_flags |= XFS_TRANS_DIRTY;
802} 764}
803 765
@@ -853,12 +815,9 @@ xfs_trans_stale_inode_buf(
853 ASSERT(atomic_read(&bip->bli_refcount) > 0); 815 ASSERT(atomic_read(&bip->bli_refcount) > 0);
854 816
855 bip->bli_flags |= XFS_BLI_STALE_INODE; 817 bip->bli_flags |= XFS_BLI_STALE_INODE;
856 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) 818 bip->bli_item.li_cb = xfs_buf_iodone;
857 xfs_buf_iodone;
858} 819}
859 820
860
861
862/* 821/*
863 * Mark the buffer as being one which contains newly allocated 822 * Mark the buffer as being one which contains newly allocated
864 * inodes. We need to make sure that even if this buffer is 823 * inodes. We need to make sure that even if this buffer is
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 27cce2a9c7e9..f783d5e9fa70 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -23,7 +23,6 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dmapi.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
29#include "xfs_extfree_item.h" 28#include "xfs_extfree_item.h"
@@ -49,9 +48,8 @@ xfs_trans_get_efi(xfs_trans_t *tp,
49 /* 48 /*
50 * Get a log_item_desc to point at the new item. 49 * Get a log_item_desc to point at the new item.
51 */ 50 */
52 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efip); 51 xfs_trans_add_item(tp, &efip->efi_item);
53 52 return efip;
54 return (efip);
55} 53}
56 54
57/* 55/*
@@ -65,15 +63,11 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
65 xfs_fsblock_t start_block, 63 xfs_fsblock_t start_block,
66 xfs_extlen_t ext_len) 64 xfs_extlen_t ext_len)
67{ 65{
68 xfs_log_item_desc_t *lidp;
69 uint next_extent; 66 uint next_extent;
70 xfs_extent_t *extp; 67 xfs_extent_t *extp;
71 68
72 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efip);
73 ASSERT(lidp != NULL);
74
75 tp->t_flags |= XFS_TRANS_DIRTY; 69 tp->t_flags |= XFS_TRANS_DIRTY;
76 lidp->lid_flags |= XFS_LID_DIRTY; 70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
77 71
78 next_extent = efip->efi_next_extent; 72 next_extent = efip->efi_next_extent;
79 ASSERT(next_extent < efip->efi_format.efi_nextents); 73 ASSERT(next_extent < efip->efi_format.efi_nextents);
@@ -106,9 +100,8 @@ xfs_trans_get_efd(xfs_trans_t *tp,
106 /* 100 /*
107 * Get a log_item_desc to point at the new item. 101 * Get a log_item_desc to point at the new item.
108 */ 102 */
109 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efdp); 103 xfs_trans_add_item(tp, &efdp->efd_item);
110 104 return efdp;
111 return (efdp);
112} 105}
113 106
114/* 107/*
@@ -122,15 +115,11 @@ xfs_trans_log_efd_extent(xfs_trans_t *tp,
122 xfs_fsblock_t start_block, 115 xfs_fsblock_t start_block,
123 xfs_extlen_t ext_len) 116 xfs_extlen_t ext_len)
124{ 117{
125 xfs_log_item_desc_t *lidp;
126 uint next_extent; 118 uint next_extent;
127 xfs_extent_t *extp; 119 xfs_extent_t *extp;
128 120
129 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efdp);
130 ASSERT(lidp != NULL);
131
132 tp->t_flags |= XFS_TRANS_DIRTY; 121 tp->t_flags |= XFS_TRANS_DIRTY;
133 lidp->lid_flags |= XFS_LID_DIRTY; 122 efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY;
134 123
135 next_extent = efdp->efd_next_extent; 124 next_extent = efdp->efd_next_extent;
136 ASSERT(next_extent < efdp->efd_format.efd_nextents); 125 ASSERT(next_extent < efdp->efd_format.efd_nextents);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 2559dfec946b..cdc53a1050c5 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -24,20 +24,16 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 29#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h" 30#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 31#include "xfs_dinode.h"
36#include "xfs_inode.h" 32#include "xfs_inode.h"
37#include "xfs_btree.h" 33#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_trans_priv.h" 34#include "xfs_trans_priv.h"
40#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
36#include "xfs_trace.h"
41 37
42#ifdef XFS_TRANS_DEBUG 38#ifdef XFS_TRANS_DEBUG
43STATIC void 39STATIC void
@@ -47,7 +43,6 @@ xfs_trans_inode_broot_debug(
47#define xfs_trans_inode_broot_debug(ip) 43#define xfs_trans_inode_broot_debug(ip)
48#endif 44#endif
49 45
50
51/* 46/*
52 * Get an inode and join it to the transaction. 47 * Get an inode and join it to the transaction.
53 */ 48 */
@@ -63,77 +58,65 @@ xfs_trans_iget(
63 int error; 58 int error;
64 59
65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); 60 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
66 if (!error && tp) 61 if (!error && tp) {
67 xfs_trans_ijoin(tp, *ipp, lock_flags); 62 xfs_trans_ijoin(tp, *ipp);
63 (*ipp)->i_itemp->ili_lock_flags = lock_flags;
64 }
68 return error; 65 return error;
69} 66}
70 67
71/* 68/*
72 * Add the locked inode to the transaction. 69 * Add a locked inode to the transaction.
73 * The inode must be locked, and it cannot be associated with any 70 *
74 * transaction. The caller must specify the locks already held 71 * The inode must be locked, and it cannot be associated with any transaction.
75 * on the inode.
76 */ 72 */
77void 73void
78xfs_trans_ijoin( 74xfs_trans_ijoin(
79 xfs_trans_t *tp, 75 struct xfs_trans *tp,
80 xfs_inode_t *ip, 76 struct xfs_inode *ip)
81 uint lock_flags)
82{ 77{
83 xfs_inode_log_item_t *iip; 78 xfs_inode_log_item_t *iip;
84 79
85 ASSERT(ip->i_transp == NULL); 80 ASSERT(ip->i_transp == NULL);
86 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 81 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
87 ASSERT(lock_flags & XFS_ILOCK_EXCL);
88 if (ip->i_itemp == NULL) 82 if (ip->i_itemp == NULL)
89 xfs_inode_item_init(ip, ip->i_mount); 83 xfs_inode_item_init(ip, ip->i_mount);
90 iip = ip->i_itemp; 84 iip = ip->i_itemp;
91 ASSERT(iip->ili_flags == 0); 85 ASSERT(iip->ili_lock_flags == 0);
92 86
93 /* 87 /*
94 * Get a log_item_desc to point at the new item. 88 * Get a log_item_desc to point at the new item.
95 */ 89 */
96 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(iip)); 90 xfs_trans_add_item(tp, &iip->ili_item);
97 91
98 xfs_trans_inode_broot_debug(ip); 92 xfs_trans_inode_broot_debug(ip);
99 93
100 /* 94 /*
101 * If the IO lock is already held, mark that in the inode log item.
102 */
103 if (lock_flags & XFS_IOLOCK_EXCL) {
104 iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL;
105 } else if (lock_flags & XFS_IOLOCK_SHARED) {
106 iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED;
107 }
108
109 /*
110 * Initialize i_transp so we can find it with xfs_inode_incore() 95 * Initialize i_transp so we can find it with xfs_inode_incore()
111 * in xfs_trans_iget() above. 96 * in xfs_trans_iget() above.
112 */ 97 */
113 ip->i_transp = tp; 98 ip->i_transp = tp;
114} 99}
115 100
116
117
118/* 101/*
119 * Mark the inode as not needing to be unlocked when the inode item's 102 * Add a locked inode to the transaction.
120 * IOP_UNLOCK() routine is called. The inode must already be locked 103 *
121 * and associated with the given transaction. 104 *
105 * Grabs a reference to the inode which will be dropped when the transaction
106 * is commited. The inode will also be unlocked at that point. The inode
107 * must be locked, and it cannot be associated with any transaction.
122 */ 108 */
123/*ARGSUSED*/
124void 109void
125xfs_trans_ihold( 110xfs_trans_ijoin_ref(
126 xfs_trans_t *tp, 111 struct xfs_trans *tp,
127 xfs_inode_t *ip) 112 struct xfs_inode *ip,
113 uint lock_flags)
128{ 114{
129 ASSERT(ip->i_transp == tp); 115 xfs_trans_ijoin(tp, ip);
130 ASSERT(ip->i_itemp != NULL); 116 IHOLD(ip);
131 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 117 ip->i_itemp->ili_lock_flags = lock_flags;
132
133 ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
134} 118}
135 119
136
137/* 120/*
138 * This is called to mark the fields indicated in fieldmask as needing 121 * This is called to mark the fields indicated in fieldmask as needing
139 * to be logged when the transaction is committed. The inode must 122 * to be logged when the transaction is committed. The inode must
@@ -149,17 +132,12 @@ xfs_trans_log_inode(
149 xfs_inode_t *ip, 132 xfs_inode_t *ip,
150 uint flags) 133 uint flags)
151{ 134{
152 xfs_log_item_desc_t *lidp;
153
154 ASSERT(ip->i_transp == tp); 135 ASSERT(ip->i_transp == tp);
155 ASSERT(ip->i_itemp != NULL); 136 ASSERT(ip->i_itemp != NULL);
156 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 137 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
157 138
158 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
159 ASSERT(lidp != NULL);
160
161 tp->t_flags |= XFS_TRANS_DIRTY; 139 tp->t_flags |= XFS_TRANS_DIRTY;
162 lidp->lid_flags |= XFS_LID_DIRTY; 140 ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY;
163 141
164 /* 142 /*
165 * Always OR in the bits from the ili_last_fields field. 143 * Always OR in the bits from the ili_last_fields field.
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
deleted file mode 100644
index f11d37d06dcc..000000000000
--- a/fs/xfs/xfs_trans_item.c
+++ /dev/null
@@ -1,441 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_trans_priv.h"
25/* XXX: from here down needed until struct xfs_trans has its own ailp */
26#include "xfs_bit.h"
27#include "xfs_buf_item.h"
28#include "xfs_sb.h"
29#include "xfs_ag.h"
30#include "xfs_dir2.h"
31#include "xfs_dmapi.h"
32#include "xfs_mount.h"
33
34STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
35 int, int, xfs_lsn_t);
36
37/*
38 * This is called to add the given log item to the transaction's
39 * list of log items. It must find a free log item descriptor
40 * or allocate a new one and add the item to that descriptor.
41 * The function returns a pointer to item descriptor used to point
42 * to the new item. The log item will now point to its new descriptor
43 * with its li_desc field.
44 */
45xfs_log_item_desc_t *
46xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
47{
48 xfs_log_item_desc_t *lidp;
49 xfs_log_item_chunk_t *licp;
50 int i=0;
51
52 /*
53 * If there are no free descriptors, allocate a new chunk
54 * of them and put it at the front of the chunk list.
55 */
56 if (tp->t_items_free == 0) {
57 licp = (xfs_log_item_chunk_t*)
58 kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP);
59 ASSERT(licp != NULL);
60 /*
61 * Initialize the chunk, and then
62 * claim the first slot in the newly allocated chunk.
63 */
64 xfs_lic_init(licp);
65 xfs_lic_claim(licp, 0);
66 licp->lic_unused = 1;
67 xfs_lic_init_slot(licp, 0);
68 lidp = xfs_lic_slot(licp, 0);
69
70 /*
71 * Link in the new chunk and update the free count.
72 */
73 licp->lic_next = tp->t_items.lic_next;
74 tp->t_items.lic_next = licp;
75 tp->t_items_free = XFS_LIC_NUM_SLOTS - 1;
76
77 /*
78 * Initialize the descriptor and the generic portion
79 * of the log item.
80 *
81 * Point the new slot at this item and return it.
82 * Also point the log item at its currently active
83 * descriptor and set the item's mount pointer.
84 */
85 lidp->lid_item = lip;
86 lidp->lid_flags = 0;
87 lidp->lid_size = 0;
88 lip->li_desc = lidp;
89 lip->li_mountp = tp->t_mountp;
90 lip->li_ailp = tp->t_mountp->m_ail;
91 return lidp;
92 }
93
94 /*
95 * Find the free descriptor. It is somewhere in the chunklist
96 * of descriptors.
97 */
98 licp = &tp->t_items;
99 while (licp != NULL) {
100 if (xfs_lic_vacancy(licp)) {
101 if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
102 i = licp->lic_unused;
103 ASSERT(xfs_lic_isfree(licp, i));
104 break;
105 }
106 for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
107 if (xfs_lic_isfree(licp, i))
108 break;
109 }
110 ASSERT(i <= XFS_LIC_MAX_SLOT);
111 break;
112 }
113 licp = licp->lic_next;
114 }
115 ASSERT(licp != NULL);
116 /*
117 * If we find a free descriptor, claim it,
118 * initialize it, and return it.
119 */
120 xfs_lic_claim(licp, i);
121 if (licp->lic_unused <= i) {
122 licp->lic_unused = i + 1;
123 xfs_lic_init_slot(licp, i);
124 }
125 lidp = xfs_lic_slot(licp, i);
126 tp->t_items_free--;
127 lidp->lid_item = lip;
128 lidp->lid_flags = 0;
129 lidp->lid_size = 0;
130 lip->li_desc = lidp;
131 lip->li_mountp = tp->t_mountp;
132 lip->li_ailp = tp->t_mountp->m_ail;
133 return lidp;
134}
135
136/*
137 * Free the given descriptor.
138 *
139 * This requires setting the bit in the chunk's free mask corresponding
140 * to the given slot.
141 */
142void
143xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
144{
145 uint slot;
146 xfs_log_item_chunk_t *licp;
147 xfs_log_item_chunk_t **licpp;
148
149 slot = xfs_lic_desc_to_slot(lidp);
150 licp = xfs_lic_desc_to_chunk(lidp);
151 xfs_lic_relse(licp, slot);
152 lidp->lid_item->li_desc = NULL;
153 tp->t_items_free++;
154
155 /*
156 * If there are no more used items in the chunk and this is not
157 * the chunk embedded in the transaction structure, then free
158 * the chunk. First pull it from the chunk list and then
159 * free it back to the heap. We didn't bother with a doubly
160 * linked list here because the lists should be very short
161 * and this is not a performance path. It's better to save
162 * the memory of the extra pointer.
163 *
164 * Also decrement the transaction structure's count of free items
165 * by the number in a chunk since we are freeing an empty chunk.
166 */
167 if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {
168 licpp = &(tp->t_items.lic_next);
169 while (*licpp != licp) {
170 ASSERT(*licpp != NULL);
171 licpp = &((*licpp)->lic_next);
172 }
173 *licpp = licp->lic_next;
174 kmem_free(licp);
175 tp->t_items_free -= XFS_LIC_NUM_SLOTS;
176 }
177}
178
179/*
180 * This is called to find the descriptor corresponding to the given
181 * log item. It returns a pointer to the descriptor.
182 * The log item MUST have a corresponding descriptor in the given
183 * transaction. This routine does not return NULL, it panics.
184 *
185 * The descriptor pointer is kept in the log item's li_desc field.
186 * Just return it.
187 */
188/*ARGSUSED*/
189xfs_log_item_desc_t *
190xfs_trans_find_item(xfs_trans_t *tp, xfs_log_item_t *lip)
191{
192 ASSERT(lip->li_desc != NULL);
193
194 return lip->li_desc;
195}
196
197
198/*
199 * Return a pointer to the first descriptor in the chunk list.
200 * This does not return NULL if there are none, it panics.
201 *
202 * The first descriptor must be in either the first or second chunk.
203 * This is because the only chunk allowed to be empty is the first.
204 * All others are freed when they become empty.
205 *
206 * At some point this and xfs_trans_next_item() should be optimized
207 * to quickly look at the mask to determine if there is anything to
208 * look at.
209 */
210xfs_log_item_desc_t *
211xfs_trans_first_item(xfs_trans_t *tp)
212{
213 xfs_log_item_chunk_t *licp;
214 int i;
215
216 licp = &tp->t_items;
217 /*
218 * If it's not in the first chunk, skip to the second.
219 */
220 if (xfs_lic_are_all_free(licp)) {
221 licp = licp->lic_next;
222 }
223
224 /*
225 * Return the first non-free descriptor in the chunk.
226 */
227 ASSERT(!xfs_lic_are_all_free(licp));
228 for (i = 0; i < licp->lic_unused; i++) {
229 if (xfs_lic_isfree(licp, i)) {
230 continue;
231 }
232
233 return xfs_lic_slot(licp, i);
234 }
235 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
236 return NULL;
237}
238
239
240/*
241 * Given a descriptor, return the next descriptor in the chunk list.
242 * This returns NULL if there are no more used descriptors in the list.
243 *
244 * We do this by first locating the chunk in which the descriptor resides,
245 * and then scanning forward in the chunk and the list for the next
246 * used descriptor.
247 */
248/*ARGSUSED*/
249xfs_log_item_desc_t *
250xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
251{
252 xfs_log_item_chunk_t *licp;
253 int i;
254
255 licp = xfs_lic_desc_to_chunk(lidp);
256
257 /*
258 * First search the rest of the chunk. The for loop keeps us
259 * from referencing things beyond the end of the chunk.
260 */
261 for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) {
262 if (xfs_lic_isfree(licp, i)) {
263 continue;
264 }
265
266 return xfs_lic_slot(licp, i);
267 }
268
269 /*
270 * Now search the next chunk. It must be there, because the
271 * next chunk would have been freed if it were empty.
272 * If there is no next chunk, return NULL.
273 */
274 if (licp->lic_next == NULL) {
275 return NULL;
276 }
277
278 licp = licp->lic_next;
279 ASSERT(!xfs_lic_are_all_free(licp));
280 for (i = 0; i < licp->lic_unused; i++) {
281 if (xfs_lic_isfree(licp, i)) {
282 continue;
283 }
284
285 return xfs_lic_slot(licp, i);
286 }
287 ASSERT(0);
288 /* NOTREACHED */
289 return NULL; /* keep gcc quite */
290}
291
292/*
293 * This is called to unlock all of the items of a transaction and to free
294 * all the descriptors of that transaction.
295 *
296 * It walks the list of descriptors and unlocks each item. It frees
297 * each chunk except that embedded in the transaction as it goes along.
298 */
299void
300xfs_trans_free_items(
301 xfs_trans_t *tp,
302 xfs_lsn_t commit_lsn,
303 int flags)
304{
305 xfs_log_item_chunk_t *licp;
306 xfs_log_item_chunk_t *next_licp;
307 int abort;
308
309 abort = flags & XFS_TRANS_ABORT;
310 licp = &tp->t_items;
311 /*
312 * Special case the embedded chunk so we don't free it below.
313 */
314 if (!xfs_lic_are_all_free(licp)) {
315 (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
316 xfs_lic_all_free(licp);
317 licp->lic_unused = 0;
318 }
319 licp = licp->lic_next;
320
321 /*
322 * Unlock each item in each chunk and free the chunks.
323 */
324 while (licp != NULL) {
325 ASSERT(!xfs_lic_are_all_free(licp));
326 (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
327 next_licp = licp->lic_next;
328 kmem_free(licp);
329 licp = next_licp;
330 }
331
332 /*
333 * Reset the transaction structure's free item count.
334 */
335 tp->t_items_free = XFS_LIC_NUM_SLOTS;
336 tp->t_items.lic_next = NULL;
337}
338
339
340
341/*
342 * This is called to unlock the items associated with a transaction.
343 * Items which were not logged should be freed.
344 * Those which were logged must still be tracked so they can be unpinned
345 * when the transaction commits.
346 */
347void
348xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
349{
350 xfs_log_item_chunk_t *licp;
351 xfs_log_item_chunk_t *next_licp;
352 xfs_log_item_chunk_t **licpp;
353 int freed;
354
355 freed = 0;
356 licp = &tp->t_items;
357
358 /*
359 * Special case the embedded chunk so we don't free.
360 */
361 if (!xfs_lic_are_all_free(licp)) {
362 freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
363 }
364 licpp = &(tp->t_items.lic_next);
365 licp = licp->lic_next;
366
367 /*
368 * Unlock each item in each chunk, free non-dirty descriptors,
369 * and free empty chunks.
370 */
371 while (licp != NULL) {
372 ASSERT(!xfs_lic_are_all_free(licp));
373 freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
374 next_licp = licp->lic_next;
375 if (xfs_lic_are_all_free(licp)) {
376 *licpp = next_licp;
377 kmem_free(licp);
378 freed -= XFS_LIC_NUM_SLOTS;
379 } else {
380 licpp = &(licp->lic_next);
381 }
382 ASSERT(*licpp == next_licp);
383 licp = next_licp;
384 }
385
386 /*
387 * Fix the free descriptor count in the transaction.
388 */
389 tp->t_items_free += freed;
390}
391
392/*
393 * Unlock each item pointed to by a descriptor in the given chunk.
394 * Stamp the commit lsn into each item if necessary.
395 * Free descriptors pointing to items which are not dirty if freeing_chunk
396 * is zero. If freeing_chunk is non-zero, then we need to unlock all
397 * items in the chunk.
398 *
399 * Return the number of descriptors freed.
400 */
401STATIC int
402xfs_trans_unlock_chunk(
403 xfs_log_item_chunk_t *licp,
404 int freeing_chunk,
405 int abort,
406 xfs_lsn_t commit_lsn)
407{
408 xfs_log_item_desc_t *lidp;
409 xfs_log_item_t *lip;
410 int i;
411 int freed;
412
413 freed = 0;
414 lidp = licp->lic_descs;
415 for (i = 0; i < licp->lic_unused; i++, lidp++) {
416 if (xfs_lic_isfree(licp, i)) {
417 continue;
418 }
419 lip = lidp->lid_item;
420 lip->li_desc = NULL;
421
422 if (commit_lsn != NULLCOMMITLSN)
423 IOP_COMMITTING(lip, commit_lsn);
424 if (abort)
425 lip->li_flags |= XFS_LI_ABORTED;
426 IOP_UNLOCK(lip);
427
428 /*
429 * Free the descriptor if the item is not dirty
430 * within this transaction and the caller is not
431 * going to just free the entire thing regardless.
432 */
433 if (!(freeing_chunk) &&
434 (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
435 xfs_lic_relse(licp, i);
436 freed++;
437 }
438 }
439
440 return freed;
441}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index c6e4f2c8de6e..e2d93d8ead7b 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -23,22 +23,8 @@ struct xfs_log_item_desc;
23struct xfs_mount; 23struct xfs_mount;
24struct xfs_trans; 24struct xfs_trans;
25 25
26/* 26void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
27 * From xfs_trans_item.c 27void xfs_trans_del_item(struct xfs_log_item *);
28 */
29struct xfs_log_item_desc *xfs_trans_add_item(struct xfs_trans *,
30 struct xfs_log_item *);
31void xfs_trans_free_item(struct xfs_trans *,
32 struct xfs_log_item_desc *);
33struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *,
34 struct xfs_log_item *);
35struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *);
36struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *,
37 struct xfs_log_item_desc *);
38
39void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn);
40void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
41 int flags);
42 28
43void xfs_trans_item_committed(struct xfs_log_item *lip, 29void xfs_trans_item_committed(struct xfs_log_item *lip,
44 xfs_lsn_t commit_lsn, int aborted); 30 xfs_lsn_t commit_lsn, int aborted);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 4d88616bde91..b7d5769d2df0 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -25,18 +25,14 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h"
32#include "xfs_attr_sf.h"
33#include "xfs_dinode.h" 30#include "xfs_dinode.h"
34#include "xfs_inode.h" 31#include "xfs_inode.h"
35#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
36#include "xfs_bmap.h" 33#include "xfs_bmap.h"
37#include "xfs_error.h" 34#include "xfs_error.h"
38#include "xfs_quota.h" 35#include "xfs_quota.h"
39#include "xfs_rw.h"
40#include "xfs_itable.h" 36#include "xfs_itable.h"
41#include "xfs_utils.h" 37#include "xfs_utils.h"
42 38
@@ -324,86 +320,3 @@ xfs_bumplink(
324 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 320 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
325 return 0; 321 return 0;
326} 322}
327
328/*
329 * Try to truncate the given file to 0 length. Currently called
330 * only out of xfs_remove when it has to truncate a file to free
331 * up space for the remove to proceed.
332 */
333int
334xfs_truncate_file(
335 xfs_mount_t *mp,
336 xfs_inode_t *ip)
337{
338 xfs_trans_t *tp;
339 int error;
340
341#ifdef QUOTADEBUG
342 /*
343 * This is called to truncate the quotainodes too.
344 */
345 if (XFS_IS_UQUOTA_ON(mp)) {
346 if (ip->i_ino != mp->m_sb.sb_uquotino)
347 ASSERT(ip->i_udquot);
348 }
349 if (XFS_IS_OQUOTA_ON(mp)) {
350 if (ip->i_ino != mp->m_sb.sb_gquotino)
351 ASSERT(ip->i_gdquot);
352 }
353#endif
354 /*
355 * Make the call to xfs_itruncate_start before starting the
356 * transaction, because we cannot make the call while we're
357 * in a transaction.
358 */
359 xfs_ilock(ip, XFS_IOLOCK_EXCL);
360 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0);
361 if (error) {
362 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
363 return error;
364 }
365
366 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
367 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
368 XFS_TRANS_PERM_LOG_RES,
369 XFS_ITRUNCATE_LOG_COUNT))) {
370 xfs_trans_cancel(tp, 0);
371 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
372 return error;
373 }
374
375 /*
376 * Follow the normal truncate locking protocol. Since we
377 * hold the inode in the transaction, we know that its number
378 * of references will stay constant.
379 */
380 xfs_ilock(ip, XFS_ILOCK_EXCL);
381 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
382 xfs_trans_ihold(tp, ip);
383 /*
384 * Signal a sync xaction. The only case where that isn't
385 * the case is if we're truncating an already unlinked file
386 * on a wsync fs. In that case, we know the blocks can't
387 * reappear in the file because the links to file are
388 * permanently toast. Currently, we're always going to
389 * want a sync transaction because this code is being
390 * called from places where nlink is guaranteed to be 1
391 * but I'm leaving the tests in to protect against future
392 * changes -- rcc.
393 */
394 error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0,
395 XFS_DATA_FORK,
396 ((ip->i_d.di_nlink != 0 ||
397 !(mp->m_flags & XFS_MOUNT_WSYNC))
398 ? 1 : 0));
399 if (error) {
400 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
401 XFS_TRANS_ABORT);
402 } else {
403 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
404 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
405 }
406 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
407
408 return error;
409}
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index ef321225d269..f55b9678264f 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,7 +18,6 @@
18#ifndef __XFS_UTILS_H__ 18#ifndef __XFS_UTILS_H__
19#define __XFS_UTILS_H__ 19#define __XFS_UTILS_H__
20 20
21extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
22extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 21extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
23 xfs_dev_t, cred_t *, prid_t, int, 22 xfs_dev_t, cred_t *, prid_t, int,
24 xfs_inode_t **, int *); 23 xfs_inode_t **, int *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c1646838898f..66d585c6917c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -26,19 +26,14 @@
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_ag.h" 27#include "xfs_ag.h"
28#include "xfs_dir2.h" 28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h" 29#include "xfs_mount.h"
31#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 32#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 33#include "xfs_dinode.h"
38#include "xfs_inode.h" 34#include "xfs_inode.h"
39#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
40#include "xfs_itable.h" 36#include "xfs_itable.h"
41#include "xfs_btree.h"
42#include "xfs_ialloc.h" 37#include "xfs_ialloc.h"
43#include "xfs_alloc.h" 38#include "xfs_alloc.h"
44#include "xfs_bmap.h" 39#include "xfs_bmap.h"
@@ -73,7 +68,7 @@ xfs_setattr(
73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 68 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
74 int need_iolock = 1; 69 int need_iolock = 1;
75 70
76 xfs_itrace_entry(ip); 71 trace_xfs_setattr(ip);
77 72
78 if (mp->m_flags & XFS_MOUNT_RDONLY) 73 if (mp->m_flags & XFS_MOUNT_RDONLY)
79 return XFS_ERROR(EROFS); 74 return XFS_ERROR(EROFS);
@@ -143,16 +138,6 @@ xfs_setattr(
143 goto error_return; 138 goto error_return;
144 } 139 }
145 } else { 140 } else {
146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
147 !(flags & XFS_ATTR_DMI)) {
148 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
149 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
150 iattr->ia_size, 0, dmflags, NULL);
151 if (code) {
152 lock_flags = 0;
153 goto error_return;
154 }
155 }
156 if (need_iolock) 141 if (need_iolock)
157 lock_flags |= XFS_IOLOCK_EXCL; 142 lock_flags |= XFS_IOLOCK_EXCL;
158 } 143 }
@@ -236,8 +221,11 @@ xfs_setattr(
236 * transaction to modify the i_size. 221 * transaction to modify the i_size.
237 */ 222 */
238 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); 223 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
224 if (code)
225 goto error_return;
239 } 226 }
240 xfs_iunlock(ip, XFS_ILOCK_EXCL); 227 xfs_iunlock(ip, XFS_ILOCK_EXCL);
228 lock_flags &= ~XFS_ILOCK_EXCL;
241 229
242 /* 230 /*
243 * We are going to log the inode size change in this 231 * We are going to log the inode size change in this
@@ -251,40 +239,38 @@ xfs_setattr(
251 * really care about here and prevents waiting for other data 239 * really care about here and prevents waiting for other data
252 * not within the range we care about here. 240 * not within the range we care about here.
253 */ 241 */
254 if (!code && 242 if (ip->i_size != ip->i_d.di_size &&
255 ip->i_size != ip->i_d.di_size &&
256 iattr->ia_size > ip->i_d.di_size) { 243 iattr->ia_size > ip->i_d.di_size) {
257 code = xfs_flush_pages(ip, 244 code = xfs_flush_pages(ip,
258 ip->i_d.di_size, iattr->ia_size, 245 ip->i_d.di_size, iattr->ia_size,
259 XBF_ASYNC, FI_NONE); 246 XBF_ASYNC, FI_NONE);
247 if (code)
248 goto error_return;
260 } 249 }
261 250
262 /* wait for all I/O to complete */ 251 /* wait for all I/O to complete */
263 xfs_ioend_wait(ip); 252 xfs_ioend_wait(ip);
264 253
265 if (!code) 254 code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
266 code = xfs_itruncate_data(ip, iattr->ia_size); 255 xfs_get_blocks);
267 if (code) { 256 if (code)
268 ASSERT(tp == NULL);
269 lock_flags &= ~XFS_ILOCK_EXCL;
270 ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
271 goto error_return; 257 goto error_return;
272 } 258
273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 259 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
274 if ((code = xfs_trans_reserve(tp, 0, 260 code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
275 XFS_ITRUNCATE_LOG_RES(mp), 0, 261 XFS_TRANS_PERM_LOG_RES,
276 XFS_TRANS_PERM_LOG_RES, 262 XFS_ITRUNCATE_LOG_COUNT);
277 XFS_ITRUNCATE_LOG_COUNT))) { 263 if (code)
278 xfs_trans_cancel(tp, 0); 264 goto error_return;
279 if (need_iolock) 265
280 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 266 truncate_setsize(inode, iattr->ia_size);
281 return code; 267
282 }
283 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 268 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
269 lock_flags |= XFS_ILOCK_EXCL;
270
284 xfs_ilock(ip, XFS_ILOCK_EXCL); 271 xfs_ilock(ip, XFS_ILOCK_EXCL);
285 272
286 xfs_trans_ijoin(tp, ip, lock_flags); 273 xfs_trans_ijoin(tp, ip);
287 xfs_trans_ihold(tp, ip);
288 274
289 /* 275 /*
290 * Only change the c/mtime if we are changing the size 276 * Only change the c/mtime if we are changing the size
@@ -334,8 +320,7 @@ xfs_setattr(
334 xfs_iflags_set(ip, XFS_ITRUNCATED); 320 xfs_iflags_set(ip, XFS_ITRUNCATED);
335 } 321 }
336 } else if (tp) { 322 } else if (tp) {
337 xfs_trans_ijoin(tp, ip, lock_flags); 323 xfs_trans_ijoin(tp, ip);
338 xfs_trans_ihold(tp, ip);
339 } 324 }
340 325
341 /* 326 /*
@@ -470,17 +455,10 @@ xfs_setattr(
470 return XFS_ERROR(code); 455 return XFS_ERROR(code);
471 } 456 }
472 457
473 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
474 !(flags & XFS_ATTR_DMI)) {
475 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
476 NULL, DM_RIGHT_NULL, NULL, NULL,
477 0, 0, AT_DELAY_FLAG(flags));
478 }
479 return 0; 458 return 0;
480 459
481 abort_return: 460 abort_return:
482 commit_flags |= XFS_TRANS_ABORT; 461 commit_flags |= XFS_TRANS_ABORT;
483 /* FALLTHROUGH */
484 error_return: 462 error_return:
485 xfs_qm_dqrele(udqp); 463 xfs_qm_dqrele(udqp);
486 xfs_qm_dqrele(gdqp); 464 xfs_qm_dqrele(gdqp);
@@ -516,7 +494,7 @@ xfs_readlink_bmap(
516 int error = 0; 494 int error = 0;
517 495
518 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, 496 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0,
519 mval, &nmaps, NULL, NULL); 497 mval, &nmaps, NULL);
520 if (error) 498 if (error)
521 goto out; 499 goto out;
522 500
@@ -557,7 +535,7 @@ xfs_readlink(
557 int pathlen; 535 int pathlen;
558 int error = 0; 536 int error = 0;
559 537
560 xfs_itrace_entry(ip); 538 trace_xfs_readlink(ip);
561 539
562 if (XFS_FORCED_SHUTDOWN(mp)) 540 if (XFS_FORCED_SHUTDOWN(mp))
563 return XFS_ERROR(EIO); 541 return XFS_ERROR(EIO);
@@ -613,14 +591,14 @@ xfs_free_eofblocks(
613 */ 591 */
614 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 592 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
615 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 593 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
616 map_len = last_fsb - end_fsb; 594 if (last_fsb <= end_fsb)
617 if (map_len <= 0)
618 return 0; 595 return 0;
596 map_len = last_fsb - end_fsb;
619 597
620 nimaps = 1; 598 nimaps = 1;
621 xfs_ilock(ip, XFS_ILOCK_SHARED); 599 xfs_ilock(ip, XFS_ILOCK_SHARED);
622 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, 600 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
623 NULL, 0, &imap, &nimaps, NULL, NULL); 601 NULL, 0, &imap, &nimaps, NULL);
624 xfs_iunlock(ip, XFS_ILOCK_SHARED); 602 xfs_iunlock(ip, XFS_ILOCK_SHARED);
625 603
626 if (!error && (nimaps != 0) && 604 if (!error && (nimaps != 0) &&
@@ -675,10 +653,7 @@ xfs_free_eofblocks(
675 } 653 }
676 654
677 xfs_ilock(ip, XFS_ILOCK_EXCL); 655 xfs_ilock(ip, XFS_ILOCK_EXCL);
678 xfs_trans_ijoin(tp, ip, 656 xfs_trans_ijoin(tp, ip);
679 XFS_IOLOCK_EXCL |
680 XFS_ILOCK_EXCL);
681 xfs_trans_ihold(tp, ip);
682 657
683 error = xfs_itruncate_finish(&tp, ip, 658 error = xfs_itruncate_finish(&tp, ip,
684 ip->i_size, 659 ip->i_size,
@@ -750,8 +725,7 @@ xfs_inactive_symlink_rmt(
750 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 725 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
751 size = (int)ip->i_d.di_size; 726 size = (int)ip->i_d.di_size;
752 ip->i_d.di_size = 0; 727 ip->i_d.di_size = 0;
753 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 728 xfs_trans_ijoin(tp, ip);
754 xfs_trans_ihold(tp, ip);
755 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 729 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
756 /* 730 /*
757 * Find the block(s) so we can inval and unmap them. 731 * Find the block(s) so we can inval and unmap them.
@@ -761,7 +735,7 @@ xfs_inactive_symlink_rmt(
761 nmaps = ARRAY_SIZE(mval); 735 nmaps = ARRAY_SIZE(mval);
762 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 736 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
763 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 737 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
764 &free_list, NULL))) 738 &free_list)))
765 goto error0; 739 goto error0;
766 /* 740 /*
767 * Invalidate the block(s). 741 * Invalidate the block(s).
@@ -776,7 +750,7 @@ xfs_inactive_symlink_rmt(
776 * Unmap the dead block(s) to the free_list. 750 * Unmap the dead block(s) to the free_list.
777 */ 751 */
778 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 752 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
779 &first_block, &free_list, NULL, &done))) 753 &first_block, &free_list, &done)))
780 goto error1; 754 goto error1;
781 ASSERT(done); 755 ASSERT(done);
782 /* 756 /*
@@ -795,8 +769,7 @@ xfs_inactive_symlink_rmt(
795 * Mark it dirty so it will be logged and moved forward in the log as 769 * Mark it dirty so it will be logged and moved forward in the log as
796 * part of every commit. 770 * part of every commit.
797 */ 771 */
798 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 772 xfs_trans_ijoin(tp, ip);
799 xfs_trans_ihold(tp, ip);
800 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 773 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
801 /* 774 /*
802 * Get a new, empty transaction to return to our caller. 775 * Get a new, empty transaction to return to our caller.
@@ -929,8 +902,7 @@ xfs_inactive_attrs(
929 goto error_cancel; 902 goto error_cancel;
930 903
931 xfs_ilock(ip, XFS_ILOCK_EXCL); 904 xfs_ilock(ip, XFS_ILOCK_EXCL);
932 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 905 xfs_trans_ijoin(tp, ip);
933 xfs_trans_ihold(tp, ip);
934 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 906 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
935 907
936 ASSERT(ip->i_d.di_anextents == 0); 908 ASSERT(ip->i_d.di_anextents == 0);
@@ -1035,8 +1007,6 @@ xfs_inactive(
1035 int error; 1007 int error;
1036 int truncate; 1008 int truncate;
1037 1009
1038 xfs_itrace_entry(ip);
1039
1040 /* 1010 /*
1041 * If the inode is already free, then there can be nothing 1011 * If the inode is already free, then there can be nothing
1042 * to clean up here. 1012 * to clean up here.
@@ -1060,9 +1030,6 @@ xfs_inactive(
1060 1030
1061 mp = ip->i_mount; 1031 mp = ip->i_mount;
1062 1032
1063 if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
1064 XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
1065
1066 error = 0; 1033 error = 0;
1067 1034
1068 /* If this is a read-only mount, don't do this (would generate I/O) */ 1035 /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1120,8 +1087,7 @@ xfs_inactive(
1120 } 1087 }
1121 1088
1122 xfs_ilock(ip, XFS_ILOCK_EXCL); 1089 xfs_ilock(ip, XFS_ILOCK_EXCL);
1123 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1090 xfs_trans_ijoin(tp, ip);
1124 xfs_trans_ihold(tp, ip);
1125 1091
1126 /* 1092 /*
1127 * normally, we have to run xfs_itruncate_finish sync. 1093 * normally, we have to run xfs_itruncate_finish sync.
@@ -1154,8 +1120,7 @@ xfs_inactive(
1154 return VN_INACTIVE_CACHE; 1120 return VN_INACTIVE_CACHE;
1155 } 1121 }
1156 1122
1157 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1123 xfs_trans_ijoin(tp, ip);
1158 xfs_trans_ihold(tp, ip);
1159 } else { 1124 } else {
1160 error = xfs_trans_reserve(tp, 0, 1125 error = xfs_trans_reserve(tp, 0,
1161 XFS_IFREE_LOG_RES(mp), 1126 XFS_IFREE_LOG_RES(mp),
@@ -1168,8 +1133,7 @@ xfs_inactive(
1168 } 1133 }
1169 1134
1170 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1135 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1171 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1136 xfs_trans_ijoin(tp, ip);
1172 xfs_trans_ihold(tp, ip);
1173 } 1137 }
1174 1138
1175 /* 1139 /*
@@ -1257,7 +1221,7 @@ xfs_lookup(
1257 int error; 1221 int error;
1258 uint lock_mode; 1222 uint lock_mode;
1259 1223
1260 xfs_itrace_entry(dp); 1224 trace_xfs_lookup(dp, name);
1261 1225
1262 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1226 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
1263 return XFS_ERROR(EIO); 1227 return XFS_ERROR(EIO);
@@ -1309,21 +1273,11 @@ xfs_create(
1309 uint log_res; 1273 uint log_res;
1310 uint log_count; 1274 uint log_count;
1311 1275
1312 xfs_itrace_entry(dp); 1276 trace_xfs_create(dp, name);
1313 1277
1314 if (XFS_FORCED_SHUTDOWN(mp)) 1278 if (XFS_FORCED_SHUTDOWN(mp))
1315 return XFS_ERROR(EIO); 1279 return XFS_ERROR(EIO);
1316 1280
1317 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
1318 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
1319 dp, DM_RIGHT_NULL, NULL,
1320 DM_RIGHT_NULL, name->name, NULL,
1321 mode, 0, 0);
1322
1323 if (error)
1324 return error;
1325 }
1326
1327 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1281 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1328 prid = dp->i_d.di_projid; 1282 prid = dp->i_d.di_projid;
1329 else 1283 else
@@ -1427,8 +1381,7 @@ xfs_create(
1427 * the transaction cancel unlocking dp so don't do it explicitly in the 1381 * the transaction cancel unlocking dp so don't do it explicitly in the
1428 * error path. 1382 * error path.
1429 */ 1383 */
1430 IHOLD(dp); 1384 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
1431 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1432 unlock_dp_on_error = B_FALSE; 1385 unlock_dp_on_error = B_FALSE;
1433 1386
1434 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1387 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1487,16 +1440,7 @@ xfs_create(
1487 xfs_qm_dqrele(gdqp); 1440 xfs_qm_dqrele(gdqp);
1488 1441
1489 *ipp = ip; 1442 *ipp = ip;
1490 1443 return 0;
1491 /* Fallthrough to std_return with error = 0 */
1492 std_return:
1493 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
1494 XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dp, DM_RIGHT_NULL,
1495 ip, DM_RIGHT_NULL, name->name, NULL, mode,
1496 error, 0);
1497 }
1498
1499 return error;
1500 1444
1501 out_bmap_cancel: 1445 out_bmap_cancel:
1502 xfs_bmap_cancel(&free_list); 1446 xfs_bmap_cancel(&free_list);
@@ -1510,8 +1454,8 @@ xfs_create(
1510 1454
1511 if (unlock_dp_on_error) 1455 if (unlock_dp_on_error)
1512 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1456 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1513 1457 std_return:
1514 goto std_return; 1458 return error;
1515 1459
1516 out_abort_rele: 1460 out_abort_rele:
1517 /* 1461 /*
@@ -1726,20 +1670,11 @@ xfs_remove(
1726 uint resblks; 1670 uint resblks;
1727 uint log_count; 1671 uint log_count;
1728 1672
1729 xfs_itrace_entry(dp); 1673 trace_xfs_remove(dp, name);
1730 xfs_itrace_entry(ip);
1731 1674
1732 if (XFS_FORCED_SHUTDOWN(mp)) 1675 if (XFS_FORCED_SHUTDOWN(mp))
1733 return XFS_ERROR(EIO); 1676 return XFS_ERROR(EIO);
1734 1677
1735 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
1736 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL,
1737 NULL, DM_RIGHT_NULL, name->name, NULL,
1738 ip->i_d.di_mode, 0, 0);
1739 if (error)
1740 return error;
1741 }
1742
1743 error = xfs_qm_dqattach(dp, 0); 1678 error = xfs_qm_dqattach(dp, 0);
1744 if (error) 1679 if (error)
1745 goto std_return; 1680 goto std_return;
@@ -1782,15 +1717,8 @@ xfs_remove(
1782 1717
1783 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 1718 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
1784 1719
1785 /* 1720 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
1786 * At this point, we've gotten both the directory and the entry 1721 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
1787 * inodes locked.
1788 */
1789 IHOLD(ip);
1790 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1791
1792 IHOLD(dp);
1793 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1794 1722
1795 /* 1723 /*
1796 * If we're removing a directory perform some additional validation. 1724 * If we're removing a directory perform some additional validation.
@@ -1877,21 +1805,15 @@ xfs_remove(
1877 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1805 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1878 xfs_filestream_deassociate(ip); 1806 xfs_filestream_deassociate(ip);
1879 1807
1880 std_return: 1808 return 0;
1881 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
1882 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
1883 NULL, DM_RIGHT_NULL, name->name, NULL,
1884 ip->i_d.di_mode, error, 0);
1885 }
1886
1887 return error;
1888 1809
1889 out_bmap_cancel: 1810 out_bmap_cancel:
1890 xfs_bmap_cancel(&free_list); 1811 xfs_bmap_cancel(&free_list);
1891 cancel_flags |= XFS_TRANS_ABORT; 1812 cancel_flags |= XFS_TRANS_ABORT;
1892 out_trans_cancel: 1813 out_trans_cancel:
1893 xfs_trans_cancel(tp, cancel_flags); 1814 xfs_trans_cancel(tp, cancel_flags);
1894 goto std_return; 1815 std_return:
1816 return error;
1895} 1817}
1896 1818
1897int 1819int
@@ -1909,25 +1831,13 @@ xfs_link(
1909 int committed; 1831 int committed;
1910 int resblks; 1832 int resblks;
1911 1833
1912 xfs_itrace_entry(tdp); 1834 trace_xfs_link(tdp, target_name);
1913 xfs_itrace_entry(sip);
1914 1835
1915 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1836 ASSERT(!S_ISDIR(sip->i_d.di_mode));
1916 1837
1917 if (XFS_FORCED_SHUTDOWN(mp)) 1838 if (XFS_FORCED_SHUTDOWN(mp))
1918 return XFS_ERROR(EIO); 1839 return XFS_ERROR(EIO);
1919 1840
1920 if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
1921 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
1922 tdp, DM_RIGHT_NULL,
1923 sip, DM_RIGHT_NULL,
1924 target_name->name, NULL, 0, 0, 0);
1925 if (error)
1926 return error;
1927 }
1928
1929 /* Return through std_return after this point. */
1930
1931 error = xfs_qm_dqattach(sip, 0); 1841 error = xfs_qm_dqattach(sip, 0);
1932 if (error) 1842 if (error)
1933 goto std_return; 1843 goto std_return;
@@ -1953,15 +1863,8 @@ xfs_link(
1953 1863
1954 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1864 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1955 1865
1956 /* 1866 xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL);
1957 * Increment vnode ref counts since xfs_trans_commit & 1867 xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL);
1958 * xfs_trans_cancel will both unlock the inodes and
1959 * decrement the associated ref counts.
1960 */
1961 IHOLD(sip);
1962 IHOLD(tdp);
1963 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
1964 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1965 1868
1966 /* 1869 /*
1967 * If the source has too many links, we can't make any more to it. 1870 * If the source has too many links, we can't make any more to it.
@@ -2014,27 +1917,14 @@ xfs_link(
2014 goto abort_return; 1917 goto abort_return;
2015 } 1918 }
2016 1919
2017 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1920 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2018 if (error)
2019 goto std_return;
2020
2021 /* Fall through to std_return with error = 0. */
2022std_return:
2023 if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
2024 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
2025 tdp, DM_RIGHT_NULL,
2026 sip, DM_RIGHT_NULL,
2027 target_name->name, NULL, 0, error, 0);
2028 }
2029 return error;
2030 1921
2031 abort_return: 1922 abort_return:
2032 cancel_flags |= XFS_TRANS_ABORT; 1923 cancel_flags |= XFS_TRANS_ABORT;
2033 /* FALLTHROUGH */
2034
2035 error_return: 1924 error_return:
2036 xfs_trans_cancel(tp, cancel_flags); 1925 xfs_trans_cancel(tp, cancel_flags);
2037 goto std_return; 1926 std_return:
1927 return error;
2038} 1928}
2039 1929
2040int 1930int
@@ -2074,7 +1964,7 @@ xfs_symlink(
2074 ip = NULL; 1964 ip = NULL;
2075 tp = NULL; 1965 tp = NULL;
2076 1966
2077 xfs_itrace_entry(dp); 1967 trace_xfs_symlink(dp, link_name);
2078 1968
2079 if (XFS_FORCED_SHUTDOWN(mp)) 1969 if (XFS_FORCED_SHUTDOWN(mp))
2080 return XFS_ERROR(EIO); 1970 return XFS_ERROR(EIO);
@@ -2086,17 +1976,6 @@ xfs_symlink(
2086 if (pathlen >= MAXPATHLEN) /* total string too long */ 1976 if (pathlen >= MAXPATHLEN) /* total string too long */
2087 return XFS_ERROR(ENAMETOOLONG); 1977 return XFS_ERROR(ENAMETOOLONG);
2088 1978
2089 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
2090 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
2091 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2092 link_name->name,
2093 (unsigned char *)target_path, 0, 0, 0);
2094 if (error)
2095 return error;
2096 }
2097
2098 /* Return through std_return after this point. */
2099
2100 udqp = gdqp = NULL; 1979 udqp = gdqp = NULL;
2101 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1980 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
2102 prid = dp->i_d.di_projid; 1981 prid = dp->i_d.di_projid;
@@ -2180,8 +2059,7 @@ xfs_symlink(
2180 * transaction cancel unlocking dp so don't do it explicitly in the 2059 * transaction cancel unlocking dp so don't do it explicitly in the
2181 * error path. 2060 * error path.
2182 */ 2061 */
2183 IHOLD(dp); 2062 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
2184 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2185 unlock_dp_on_error = B_FALSE; 2063 unlock_dp_on_error = B_FALSE;
2186 2064
2187 /* 2065 /*
@@ -2215,7 +2093,7 @@ xfs_symlink(
2215 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 2093 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
2216 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 2094 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
2217 &first_block, resblks, mval, &nmaps, 2095 &first_block, resblks, mval, &nmaps,
2218 &free_list, NULL); 2096 &free_list);
2219 if (error) { 2097 if (error) {
2220 goto error1; 2098 goto error1;
2221 } 2099 }
@@ -2278,21 +2156,8 @@ xfs_symlink(
2278 xfs_qm_dqrele(udqp); 2156 xfs_qm_dqrele(udqp);
2279 xfs_qm_dqrele(gdqp); 2157 xfs_qm_dqrele(gdqp);
2280 2158
2281 /* Fall through to std_return with error = 0 or errno from 2159 *ipp = ip;
2282 * xfs_trans_commit */ 2160 return 0;
2283std_return:
2284 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
2285 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
2286 dp, DM_RIGHT_NULL,
2287 error ? NULL : ip,
2288 DM_RIGHT_NULL, link_name->name,
2289 (unsigned char *)target_path,
2290 0, error, 0);
2291 }
2292
2293 if (!error)
2294 *ipp = ip;
2295 return error;
2296 2161
2297 error2: 2162 error2:
2298 IRELE(ip); 2163 IRELE(ip);
@@ -2306,8 +2171,8 @@ std_return:
2306 2171
2307 if (unlock_dp_on_error) 2172 if (unlock_dp_on_error)
2308 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2173 xfs_iunlock(dp, XFS_ILOCK_EXCL);
2309 2174 std_return:
2310 goto std_return; 2175 return error;
2311} 2176}
2312 2177
2313int 2178int
@@ -2333,13 +2198,12 @@ xfs_set_dmattrs(
2333 return error; 2198 return error;
2334 } 2199 }
2335 xfs_ilock(ip, XFS_ILOCK_EXCL); 2200 xfs_ilock(ip, XFS_ILOCK_EXCL);
2336 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2201 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
2337 2202
2338 ip->i_d.di_dmevmask = evmask; 2203 ip->i_d.di_dmevmask = evmask;
2339 ip->i_d.di_dmstate = state; 2204 ip->i_d.di_dmstate = state;
2340 2205
2341 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2206 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2342 IHOLD(ip);
2343 error = xfs_trans_commit(tp, 0); 2207 error = xfs_trans_commit(tp, 0);
2344 2208
2345 return error; 2209 return error;
@@ -2390,7 +2254,7 @@ xfs_alloc_file_space(
2390 int committed; 2254 int committed;
2391 int error; 2255 int error;
2392 2256
2393 xfs_itrace_entry(ip); 2257 trace_xfs_alloc_file_space(ip);
2394 2258
2395 if (XFS_FORCED_SHUTDOWN(mp)) 2259 if (XFS_FORCED_SHUTDOWN(mp))
2396 return XFS_ERROR(EIO); 2260 return XFS_ERROR(EIO);
@@ -2412,25 +2276,9 @@ xfs_alloc_file_space(
2412 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 2276 startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
2413 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 2277 allocatesize_fsb = XFS_B_TO_FSB(mp, count);
2414 2278
2415 /* Generate a DMAPI event if needed. */
2416 if (alloc_type != 0 && offset < ip->i_size &&
2417 (attr_flags & XFS_ATTR_DMI) == 0 &&
2418 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
2419 xfs_off_t end_dmi_offset;
2420
2421 end_dmi_offset = offset+len;
2422 if (end_dmi_offset > ip->i_size)
2423 end_dmi_offset = ip->i_size;
2424 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
2425 end_dmi_offset - offset, 0, NULL);
2426 if (error)
2427 return error;
2428 }
2429
2430 /* 2279 /*
2431 * Allocate file space until done or until there is an error 2280 * Allocate file space until done or until there is an error
2432 */ 2281 */
2433retry:
2434 while (allocatesize_fsb && !error) { 2282 while (allocatesize_fsb && !error) {
2435 xfs_fileoff_t s, e; 2283 xfs_fileoff_t s, e;
2436 2284
@@ -2488,8 +2336,7 @@ retry:
2488 if (error) 2336 if (error)
2489 goto error1; 2337 goto error1;
2490 2338
2491 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2339 xfs_trans_ijoin(tp, ip);
2492 xfs_trans_ihold(tp, ip);
2493 2340
2494 /* 2341 /*
2495 * Issue the xfs_bmapi() call to allocate the blocks 2342 * Issue the xfs_bmapi() call to allocate the blocks
@@ -2498,7 +2345,7 @@ retry:
2498 error = xfs_bmapi(tp, ip, startoffset_fsb, 2345 error = xfs_bmapi(tp, ip, startoffset_fsb,
2499 allocatesize_fsb, bmapi_flag, 2346 allocatesize_fsb, bmapi_flag,
2500 &firstfsb, 0, imapp, &nimaps, 2347 &firstfsb, 0, imapp, &nimaps,
2501 &free_list, NULL); 2348 &free_list);
2502 if (error) { 2349 if (error) {
2503 goto error0; 2350 goto error0;
2504 } 2351 }
@@ -2527,17 +2374,6 @@ retry:
2527 startoffset_fsb += allocated_fsb; 2374 startoffset_fsb += allocated_fsb;
2528 allocatesize_fsb -= allocated_fsb; 2375 allocatesize_fsb -= allocated_fsb;
2529 } 2376 }
2530dmapi_enospc_check:
2531 if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
2532 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
2533 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
2534 ip, DM_RIGHT_NULL,
2535 ip, DM_RIGHT_NULL,
2536 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
2537 if (error == 0)
2538 goto retry; /* Maybe DMAPI app. has made space */
2539 /* else fall through with error from XFS_SEND_DATA */
2540 }
2541 2377
2542 return error; 2378 return error;
2543 2379
@@ -2548,7 +2384,7 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
2548error1: /* Just cancel transaction */ 2384error1: /* Just cancel transaction */
2549 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 2385 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
2550 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2386 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2551 goto dmapi_enospc_check; 2387 return error;
2552} 2388}
2553 2389
2554/* 2390/*
@@ -2598,7 +2434,7 @@ xfs_zero_remaining_bytes(
2598 offset_fsb = XFS_B_TO_FSBT(mp, offset); 2434 offset_fsb = XFS_B_TO_FSBT(mp, offset);
2599 nimap = 1; 2435 nimap = 1;
2600 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, 2436 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0,
2601 NULL, 0, &imap, &nimap, NULL, NULL); 2437 NULL, 0, &imap, &nimap, NULL);
2602 if (error || nimap < 1) 2438 if (error || nimap < 1)
2603 break; 2439 break;
2604 ASSERT(imap.br_blockcount >= 1); 2440 ASSERT(imap.br_blockcount >= 1);
@@ -2661,7 +2497,6 @@ xfs_free_file_space(
2661{ 2497{
2662 int committed; 2498 int committed;
2663 int done; 2499 int done;
2664 xfs_off_t end_dmi_offset;
2665 xfs_fileoff_t endoffset_fsb; 2500 xfs_fileoff_t endoffset_fsb;
2666 int error; 2501 int error;
2667 xfs_fsblock_t firstfsb; 2502 xfs_fsblock_t firstfsb;
@@ -2680,7 +2515,7 @@ xfs_free_file_space(
2680 2515
2681 mp = ip->i_mount; 2516 mp = ip->i_mount;
2682 2517
2683 xfs_itrace_entry(ip); 2518 trace_xfs_free_file_space(ip);
2684 2519
2685 error = xfs_qm_dqattach(ip, 0); 2520 error = xfs_qm_dqattach(ip, 0);
2686 if (error) 2521 if (error)
@@ -2691,19 +2526,7 @@ xfs_free_file_space(
2691 return error; 2526 return error;
2692 rt = XFS_IS_REALTIME_INODE(ip); 2527 rt = XFS_IS_REALTIME_INODE(ip);
2693 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 2528 startoffset_fsb = XFS_B_TO_FSB(mp, offset);
2694 end_dmi_offset = offset + len; 2529 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
2695 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
2696
2697 if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
2698 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
2699 if (end_dmi_offset > ip->i_size)
2700 end_dmi_offset = ip->i_size;
2701 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
2702 offset, end_dmi_offset - offset,
2703 AT_DELAY_FLAG(attr_flags), NULL);
2704 if (error)
2705 return error;
2706 }
2707 2530
2708 if (attr_flags & XFS_ATTR_NOLOCK) 2531 if (attr_flags & XFS_ATTR_NOLOCK)
2709 need_iolock = 0; 2532 need_iolock = 0;
@@ -2731,7 +2554,7 @@ xfs_free_file_space(
2731 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 2554 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
2732 nimap = 1; 2555 nimap = 1;
2733 error = xfs_bmapi(NULL, ip, startoffset_fsb, 2556 error = xfs_bmapi(NULL, ip, startoffset_fsb,
2734 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 2557 1, 0, NULL, 0, &imap, &nimap, NULL);
2735 if (error) 2558 if (error)
2736 goto out_unlock_iolock; 2559 goto out_unlock_iolock;
2737 ASSERT(nimap == 0 || nimap == 1); 2560 ASSERT(nimap == 0 || nimap == 1);
@@ -2746,7 +2569,7 @@ xfs_free_file_space(
2746 } 2569 }
2747 nimap = 1; 2570 nimap = 1;
2748 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 2571 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1,
2749 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 2572 1, 0, NULL, 0, &imap, &nimap, NULL);
2750 if (error) 2573 if (error)
2751 goto out_unlock_iolock; 2574 goto out_unlock_iolock;
2752 ASSERT(nimap == 0 || nimap == 1); 2575 ASSERT(nimap == 0 || nimap == 1);
@@ -2814,8 +2637,7 @@ xfs_free_file_space(
2814 if (error) 2637 if (error)
2815 goto error1; 2638 goto error1;
2816 2639
2817 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2640 xfs_trans_ijoin(tp, ip);
2818 xfs_trans_ihold(tp, ip);
2819 2641
2820 /* 2642 /*
2821 * issue the bunmapi() call to free the blocks 2643 * issue the bunmapi() call to free the blocks
@@ -2823,7 +2645,7 @@ xfs_free_file_space(
2823 xfs_bmap_init(&free_list, &firstfsb); 2645 xfs_bmap_init(&free_list, &firstfsb);
2824 error = xfs_bunmapi(tp, ip, startoffset_fsb, 2646 error = xfs_bunmapi(tp, ip, startoffset_fsb,
2825 endoffset_fsb - startoffset_fsb, 2647 endoffset_fsb - startoffset_fsb,
2826 0, 2, &firstfsb, &free_list, NULL, &done); 2648 0, 2, &firstfsb, &free_list, &done);
2827 if (error) { 2649 if (error) {
2828 goto error0; 2650 goto error0;
2829 } 2651 }
@@ -2883,8 +2705,6 @@ xfs_change_file_space(
2883 xfs_trans_t *tp; 2705 xfs_trans_t *tp;
2884 struct iattr iattr; 2706 struct iattr iattr;
2885 2707
2886 xfs_itrace_entry(ip);
2887
2888 if (!S_ISREG(ip->i_d.di_mode)) 2708 if (!S_ISREG(ip->i_d.di_mode))
2889 return XFS_ERROR(EINVAL); 2709 return XFS_ERROR(EINVAL);
2890 2710
@@ -2985,8 +2805,7 @@ xfs_change_file_space(
2985 2805
2986 xfs_ilock(ip, XFS_ILOCK_EXCL); 2806 xfs_ilock(ip, XFS_ILOCK_EXCL);
2987 2807
2988 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2808 xfs_trans_ijoin(tp, ip);
2989 xfs_trans_ihold(tp, ip);
2990 2809
2991 if ((attr_flags & XFS_ATTR_DMI) == 0) { 2810 if ((attr_flags & XFS_ATTR_DMI) == 0) {
2992 ip->i_d.di_mode &= ~S_ISUID; 2811 ip->i_d.di_mode &= ~S_ISUID;